diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index a53fc1a..13cb4f0 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -636,9 +636,20 @@ minillaplocal.query.files=acid_globallimit.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ vector_number_compare_projection.q,\ + vector_outer_reference_windowed.q,\ vector_partitioned_date_time.q,\ vector_ptf_part_simple.q,\ vector_udf1.q,\ + vector_windowing_expressions.q,\ + vector_windowing_gby.q,\ + vector_windowing_gby2.q,\ + vector_windowing_multipartitioning.q,\ + vector_windowing_order_null.q,\ + vector_windowing_range_multiorder.q,\ + vector_windowing_rank.q,\ + vector_windowing_streaming.q,\ + vector_windowing_windowspec.q,\ + vector_windowing_windowspec4.q,\ vectorization_short_regress.q,\ vectorized_dynamic_partition_pruning.q,\ vectorized_dynamic_semijoin_reduction.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 8b04cd4..ac35f91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -632,6 +632,13 @@ public void endGroup() throws HiveException { defaultEndGroup(); } + // Tell the operator the status of the next key-grouped VectorizedRowBatch that will be delivered + // to the process method. E.g. by reduce-shuffle. These semantics are needed by PTF so it can + // efficiently add computed values to the last batch of a group key. + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + // Do nothing. + } + // an blocking operator (e.g. GroupByOperator and JoinOperator) can // override this method to forward its outputs public void flush() throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index afe1484..c5a4217 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; @@ -139,6 +140,7 @@ vectorOpvec.put(FileSinkDesc.class, VectorFileSinkOperator.class); vectorOpvec.put(FilterDesc.class, VectorFilterOperator.class); vectorOpvec.put(LimitDesc.class, VectorLimitOperator.class); + vectorOpvec.put(PTFDesc.class, VectorPTFOperator.class); vectorOpvec.put(SparkHashTableSinkDesc.class, VectorSparkHashTableSinkOperator.class); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 60660ac..548f1fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -379,20 +379,6 @@ private boolean pushRecordVector() { BytesWritable keyWritable = (BytesWritable) reader.getCurrentKey(); valueWritables = reader.getCurrentValues(); - // Check if this is a new group or same group - if (handleGroupKey && !keyWritable.equals(this.groupKey)) { - // If a operator wants to do some work at the beginning of a group - if (groupKey == null) { // the first group - this.groupKey = new BytesWritable(); - } else { - // If a operator wants to do some work at the end of a group - reducer.endGroup(); - } - - groupKey.set(keyWritable.getBytes(), 0, keyWritable.getLength()); - reducer.startGroup(); - } - processVectorGroup(keyWritable, valueWritables, tag); return true; } catch (Throwable e) { @@ -408,15 +394,20 @@ private boolean pushRecordVector() { } /** + * + * @param keyWritable * @param values - * @return true if it is not done and can take more inputs + * @param tag + * @throws HiveException + * @throws IOException */ private void processVectorGroup(BytesWritable keyWritable, Iterable values, byte tag) throws HiveException, IOException { + Preconditions.checkState(batch.size == 0); + // Deserialize key into vector row columns. - // Since we referencing byte column vector byte arrays by reference, we don't need - // a data buffer. + // byte[] keyBytes = keyWritable.getBytes(); int keyLength = keyWritable.getLength(); @@ -442,6 +433,24 @@ private void processVectorGroup(BytesWritable keyWritable, int batchBytes = keyBytes.length; try { for (Object value : values) { + if (rowIdx >= maxSize || + (rowIdx > 0 && batchBytes >= BATCH_BYTES)) { + + // Batch is full AND we have at least 1 more row... + batch.size = rowIdx; + if (handleGroupKey) { + reducer.setNextVectorBatchGroupStatus(/* isLastGroupBatch */ false); + } + reducer.process(batch, tag); + + // Reset just the value columns and value buffer. + for (int i = firstValueColumnOffset; i < batch.numCols; i++) { + // Note that reset also resets the data buffer for bytes column vectors. + batch.cols[i].reset(); + } + rowIdx = 0; + batchBytes = keyBytes.length; + } if (valueLazyBinaryDeserializeToRow != null) { // Deserialize value into vector row columns. BytesWritable valueWritable = (BytesWritable) value; @@ -456,24 +465,13 @@ private void processVectorGroup(BytesWritable keyWritable, valueLazyBinaryDeserializeToRow.deserialize(batch, rowIdx); } rowIdx++; - if (rowIdx >= maxSize || batchBytes >= BATCH_BYTES) { - - // Batch is full. - batch.size = rowIdx; - reducer.process(batch, tag); - - // Reset just the value columns and value buffer. - for (int i = firstValueColumnOffset; i < batch.numCols; i++) { - // Note that reset also resets the data buffer for bytes column vectors. - batch.cols[i].reset(); - } - rowIdx = 0; - batchBytes = 0; - } } if (rowIdx > 0) { // Flush final partial batch. - VectorizedBatchUtil.setBatchSize(batch, rowIdx); + batch.size = rowIdx; + if (handleGroupKey) { + reducer.setNextVectorBatchGroupStatus(/* isLastGroupBatch */ true); + } reducer.process(batch, tag); } batch.reset(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 5b4c7c3..e3c0a02 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -148,8 +148,7 @@ */ private static interface IProcessingMode { public void initialize(Configuration hconf) throws HiveException; - public void startGroup() throws HiveException; - public void endGroup() throws HiveException; + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException; public void processBatch(VectorizedRowBatch batch) throws HiveException; public void close(boolean aborted) throws HiveException; } @@ -159,14 +158,10 @@ */ private abstract class ProcessingModeBase implements IProcessingMode { - // Overridden and used in sorted reduce group batch processing mode. + // Overridden and used in ProcessingModeReduceMergePartial mode. @Override - public void startGroup() throws HiveException { - // Do nothing. - } - @Override - public void endGroup() throws HiveException { - // Do nothing. + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + throw new HiveException("Status call for next key-grouped VectorizedRowBatch not expected for ProcssingMode class " + this.getClass().getName()); } protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, @@ -258,6 +253,11 @@ public void initialize(Configuration hconf) throws HiveException { } @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + // Do nothing. + } + + @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { @@ -682,6 +682,11 @@ public void free(VectorAggregationBufferRow t) { } @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + // Do nothing. + } + + @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { @@ -770,8 +775,8 @@ public void close(boolean aborted) throws HiveException { */ private class ProcessingModeReduceMergePartial extends ProcessingModeBase { - private boolean inGroup; private boolean first; + private boolean isLastGroupBatch; /** * The group vector key helper. @@ -790,7 +795,7 @@ public void close(boolean aborted) throws HiveException { @Override public void initialize(Configuration hconf) throws HiveException { - inGroup = false; + isLastGroupBatch = true; // We do not include the dummy grouping set column in the output. So we pass outputKeyLength // instead of keyExpressions.length @@ -802,24 +807,18 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void startGroup() throws HiveException { - inGroup = true; - first = true; - } - - @Override - public void endGroup() throws HiveException { - if (inGroup && !first) { - writeGroupRow(groupAggregators, buffer); - groupAggregators.reset(); + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + if (this.isLastGroupBatch) { + // Previous batch was the last of a group of batches. Remember the next is the first batch + // of a new group of batches. + first = true; } - inGroup = false; + this.isLastGroupBatch = isLastGroupBatch; } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { - assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. first = false; @@ -836,11 +835,16 @@ public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(groupAggregators.getAggregationBuffer(i), batch); } + + if (isLastGroupBatch) { + writeGroupRow(groupAggregators, buffer); + groupAggregators.reset(); + } } @Override public void close(boolean aborted) throws HiveException { - if (!aborted && inGroup && !first) { + if (!aborted && !first && !isLastGroupBatch) { writeGroupRow(groupAggregators, buffer); } } @@ -1011,21 +1015,26 @@ private void changeToStreamingMode() throws HiveException { } @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + processingMode.setNextVectorBatchGroupStatus(isLastGroupBatch); + } + + @Override public void startGroup() throws HiveException { - processingMode.startGroup(); // We do not call startGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.startGroup(); + throw new HiveException("Unexpected startGroup"); } @Override public void endGroup() throws HiveException { - processingMode.endGroup(); // We do not call endGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.endGroup(); + throw new HiveException("Unexpected startGroup"); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 5c490ef..992cbce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -110,6 +110,14 @@ protected void initializeOp(Configuration hconf) throws HiveException { outputFieldNames, objectInspectors); } + // Must send on to VectorPTFOperator... + @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + for (Operator op : childOperators) { + op.setNextVectorBatchGroupStatus(isLastGroupBatch); + } + } + @Override public void process(Object row, int tag) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c3940cb..a9aba56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -406,7 +406,7 @@ protected boolean needsImplicitCastForDecimal(GenericUDF udf) { return udfsNeedingImplicitDecimalCast.contains(udfClass); } - protected int getInputColumnIndex(String name) throws HiveException { + public int getInputColumnIndex(String name) throws HiveException { if (name == null) { throw new HiveException("Null column name"); } @@ -438,7 +438,7 @@ protected OutputColumnManager(int initialOutputCol) { private final Set usedOutputColumns = new HashSet(); - int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { + int allocateOutputColumn(TypeInfo typeInfo) { if (initialOutputCol < 0) { // This is a test calling. return 0; @@ -499,7 +499,7 @@ void freeOutputColumn(int index) { } } - public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { + public int allocateScratchColumn(TypeInfo typeInfo) { return ocm.allocateOutputColumn(typeInfo); } @@ -2635,7 +2635,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } } - static String getScratchName(TypeInfo typeInfo) throws HiveException { + static String getScratchName(TypeInfo typeInfo) { // For now, leave DECIMAL precision/scale in the name so DecimalColumnVector scratch columns // don't need their precision/scale adjusted... if (typeInfo.getCategory() == Category.PRIMITIVE && diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 990e896..03c09e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -579,7 +579,7 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return typeInfoList.toArray(new TypeInfo[0]); } - static ColumnVector cloneColumnVector(ColumnVector source + public static ColumnVector makeLikeColumnVector(ColumnVector source ) throws HiveException{ if (source instanceof LongColumnVector) { return new LongColumnVector(((LongColumnVector) source).vector.length); @@ -598,25 +598,25 @@ static ColumnVector cloneColumnVector(ColumnVector source return new IntervalDayTimeColumnVector(((IntervalDayTimeColumnVector) source).getLength()); } else if (source instanceof ListColumnVector) { ListColumnVector src = (ListColumnVector) source; - ColumnVector child = cloneColumnVector(src.child); + ColumnVector child = makeLikeColumnVector(src.child); return new ListColumnVector(src.offsets.length, child); } else if (source instanceof MapColumnVector) { MapColumnVector src = (MapColumnVector) source; - ColumnVector keys = cloneColumnVector(src.keys); - ColumnVector values = cloneColumnVector(src.values); + ColumnVector keys = makeLikeColumnVector(src.keys); + ColumnVector values = makeLikeColumnVector(src.values); return new MapColumnVector(src.offsets.length, keys, values); } else if (source instanceof StructColumnVector) { StructColumnVector src = (StructColumnVector) source; ColumnVector[] copy = new ColumnVector[src.fields.length]; for(int i=0; i < copy.length; ++i) { - copy[i] = cloneColumnVector(src.fields[i]); + copy[i] = makeLikeColumnVector(src.fields[i]); } return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, copy); } else if (source instanceof UnionColumnVector) { UnionColumnVector src = (UnionColumnVector) source; ColumnVector[] copy = new ColumnVector[src.fields.length]; for(int i=0; i < copy.length; ++i) { - copy[i] = cloneColumnVector(src.fields[i]); + copy[i] = makeLikeColumnVector(src.fields[i]); } return new UnionColumnVector(src.tags.length, copy); } else @@ -625,6 +625,53 @@ static ColumnVector cloneColumnVector(ColumnVector source " is not supported!"); } + public static void swapColumnVector( + VectorizedRowBatch batch1, int batch1ColumnNum, + VectorizedRowBatch batch2, int batch2ColumnNum) { + ColumnVector colVector1 = batch1.cols[batch1ColumnNum]; + batch1.cols[batch1ColumnNum] = batch2.cols[batch2ColumnNum]; + batch2.cols[batch2ColumnNum] = colVector1; + } + + public static void copyRepeatingColumn(VectorizedRowBatch sourceBatch, int sourceColumnNum, + VectorizedRowBatch targetBatch, int targetColumnNum, boolean setByValue) { + ColumnVector sourceColVector = sourceBatch.cols[sourceColumnNum]; + ColumnVector targetColVector = targetBatch.cols[targetColumnNum]; + + targetColVector.isRepeating = true; + + if (!sourceColVector.noNulls) { + targetColVector.noNulls = false; + targetColVector.isNull[0] = true; + return; + } + + if (sourceColVector instanceof LongColumnVector) { + ((LongColumnVector) targetColVector).vector[0] = ((LongColumnVector) sourceColVector).vector[0]; + } else if (sourceColVector instanceof DoubleColumnVector) { + ((DoubleColumnVector) targetColVector).vector[0] = ((DoubleColumnVector) sourceColVector).vector[0]; + } else if (sourceColVector instanceof BytesColumnVector) { + BytesColumnVector bytesColVector = (BytesColumnVector) sourceColVector; + byte[] bytes = bytesColVector.vector[0]; + final int start = bytesColVector.start[0]; + final int length = bytesColVector.length[0]; + if (setByValue) { + ((BytesColumnVector) targetColVector).setVal(0, bytes, start, length); + } else { + ((BytesColumnVector) targetColVector).setRef(0, bytes, start, length); + } + } else if (sourceColVector instanceof DecimalColumnVector) { + ((DecimalColumnVector) targetColVector).set(0, ((DecimalColumnVector) sourceColVector).vector[0]); + } else if (sourceColVector instanceof TimestampColumnVector) { + ((TimestampColumnVector) targetColVector).set(0, ((TimestampColumnVector) sourceColVector).asScratchTimestamp(0)); + } else if (sourceColVector instanceof IntervalDayTimeColumnVector) { + ((IntervalDayTimeColumnVector) targetColVector).set(0, ((IntervalDayTimeColumnVector) sourceColVector).asScratchIntervalDayTime(0)); + } else { + throw new RuntimeException("Column vector class " + sourceColVector.getClass().getName() + + " is not supported!"); + } + } + /** * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty * @param batch the batch to imitate @@ -635,7 +682,7 @@ public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveE VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols); for (int i = 0; i < batch.numCols; i++) { if (batch.cols[i] != null) { - newBatch.cols[i] = cloneColumnVector(batch.cols[i]); + newBatch.cols[i] = makeLikeColumnVector(batch.cols[i]); newBatch.cols[i].init(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java new file mode 100644 index 0000000..beca5f9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * This is the vector PTF evaluator base class. An evaluator does the group batch aggregation work + * on an aggregation's 0 or 1 argument(s) and at some point will fill in an output column with the + * aggregation result. The aggregation argument is an input column or expression, or no argument. + * + * When the aggregation is streaming (e.g. row_number, rank, first_value, etc), the output column + * can be filled in immediately by the implementation of evaluateGroupBatch. + * + * For non-streaming aggregations, the aggregation result is not known until the last group batch + * is processed. After the last group batch has been processed, the VectorPTFGroupBatches class + * will call the isGroupResultNull, getResultColumnVectorType, getLongGroupResult | + * getDoubleGroupResult | getDecimalGroupResult, and getOutputColumnNum methods to get aggregation + * result information necessary to write it into the output column (as a repeated column) of all + * the group batches. + */ +public abstract class VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorBase.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected final WindowFrameDef windowFrameDef; + private final VectorExpression inputVecExpr; + protected final int inputColumnNum; + protected final int outputColumnNum; + + public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + this.windowFrameDef = windowFrameDef; + if (inputVecExpr == null) { + inputColumnNum = -1; + this.inputVecExpr = null; + } else { + inputColumnNum = inputVecExpr.getOutputColumn(); + if (inputVecExpr instanceof IdentityExpression) { + this.inputVecExpr = null; + } else { + this.inputVecExpr = inputVecExpr; + } + } + this.outputColumnNum = outputColumnNum; + } + + // Evaluate the aggregation input argument expression. + public void evaluateInputExpr(VectorizedRowBatch batch) { + if (inputVecExpr != null) { + inputVecExpr.evaluate(batch); + } + } + + // Evaluate the aggregation over one of the group's batches. + public abstract void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch); + + // Returns true if the aggregation result will be streamed. + public boolean streamsResult() { + // Assume it is not streamjng by default. + return false; + } + + public int getOutputColumnNum() { + return outputColumnNum; + } + + // After processing all the group's batches with evaluateGroupBatch, is the non-streaming + // aggregation result null? + public boolean isGroupResultNull() { + return false; + } + + // What is the ColumnVector type of the aggregation result? + public abstract Type getResultColumnVectorType(); + + /* + * After processing all the non-streaming group's batches with evaluateGroupBatch and + * isGroupResultNull is false, the aggregation result value (based on getResultColumnVectorType). + */ + + public long getLongGroupResult() { + throw new RuntimeException("No long group result evaluator implementation " + this.getClass().getName()); + } + + public double getDoubleGroupResult() { + throw new RuntimeException("No double group result evaluator implementation " + this.getClass().getName()); + } + + public HiveDecimalWritable getDecimalGroupResult() { + throw new RuntimeException("No decimal group result evaluator implementation " + this.getClass().getName()); + } + + // Resets the aggregation calculation variable(s). + public abstract void resetEvaluator(); +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java new file mode 100644 index 0000000..638fc9e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates count(column) for a PTF group. + * + * Count any rows of the group where the input column/expression is non-null. + */ +public class VectorPTFEvaluatorCount extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorCount.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected long count; + + public VectorPTFEvaluatorCount(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Count non-null column rows. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + ColumnVector colVector = batch.cols[inputColumnNum]; + if (colVector.isRepeating) { + if (colVector.noNulls) { + count += size; + } + } else if (colVector.noNulls) { + count += size; + } else { + boolean[] batchIsNull = colVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long varCount = 1; + i++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varCount++; + } + } + count += varCount; + } + } + + @Override + public boolean isGroupResultNull() { + return false; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return count; + } + + @Override + public void resetEvaluator() { + count = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java new file mode 100644 index 0000000..cf8e626 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates count(*) for a PTF group. + * + * Count all rows of the group. No input column/expression. + */ +public class VectorPTFEvaluatorCountStar extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorCountStar.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected long count; + + public VectorPTFEvaluatorCountStar(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + // No input expression for COUNT(*). + // evaluateInputExpr(batch); + + // Count all rows. + + count += batch.size; + } + + @Override + public boolean isGroupResultNull() { + return false; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return count; + } + + @Override + public void resetEvaluator() { + count = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java new file mode 100644 index 0000000..599e73b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorDecimalAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected HiveDecimalWritable sum; + private int nonNullGroupCount; + private HiveDecimalWritable temp; + private HiveDecimalWritable avg; + + public VectorPTFEvaluatorDecimalAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + avg = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null decimal column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls) { + + // We have a repeated value. The sum increases by value * batch.size. + temp.setFromLong(batch.size); + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(decimalColVector.vector[0]); + sum.mutateMultiply(temp); + isGroupResultNull = false; + } else { + temp.mutateMultiply(decimalColVector.vector[0]); + sum.mutateAdd(temp); + } + nonNullGroupCount += size; + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(vector[0]); + isGroupResultNull = false; + } else { + sum.mutateAdd(vector[0]); + } + for (int i = 1; i < size; i++) { + sum.mutateAdd(vector[i]); + } + nonNullGroupCount += size; + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(vector[i++]); + isGroupResultNull = false; + } else { + sum.mutateAdd(vector[i++]); + } + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + } + } + } + + if (isLastGroupBatch) { + if (!isGroupResultNull) { + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupResult() { + return avg; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum.set(HiveDecimal.ZERO); + nonNullGroupCount = 0; + avg.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java new file mode 100644 index 0000000..01a8c53 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal first_value() for a PTF group. + * + * We capture the first value from the first batch. It can be NULL. + * We then set (stream) the output column with that value as repeated in each batch. + */ +public class VectorPTFEvaluatorDecimalFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isGroupResultNull; + protected HiveDecimalWritable firstValue; + + public VectorPTFEvaluatorDecimalFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + firstValue = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // First row determines isGroupResultNull and decimal firstValue; stream fill result as repeated. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!haveFirstValue) { + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + firstValue.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } + } else if (decimalColVector.noNulls) { + firstValue.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } else { + if (!decimalColVector.isNull[0]) { + firstValue.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } + } + haveFirstValue = true; + } + + // First value is repeated for all batches. + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + outputColVector.isRepeating = true; + if (isGroupResultNull) { + outputColVector.noNulls = false; + outputColVector.isNull[0] = true; + } else { + outputColVector.noNulls = true; + outputColVector.isNull[0] = false; + outputColVector.vector[0].set(firstValue); + } + } + + public boolean streamsResult() { + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isGroupResultNull = true; + firstValue.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java new file mode 100644 index 0000000..9ac79f5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal last_value() for a PTF group. + * + * We capture the last value from the last batch. It can be NULL. + * It becomes the group value. + */ +public class VectorPTFEvaluatorDecimalLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected HiveDecimalWritable lastValue; + + public VectorPTFEvaluatorDecimalLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + lastValue = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Last row of last batch determines isGroupResultNull and decimal lastValue. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!isLastGroupBatch) { + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + lastValue.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } + } else if (decimalColVector.noNulls) { + lastValue.set(decimalColVector.vector[size - 1]); + isGroupResultNull = false; + } else { + final int lastBatchIndex = size - 1; + if (!decimalColVector.isNull[lastBatchIndex]) { + lastValue.set(decimalColVector.vector[lastBatchIndex]); + isGroupResultNull = false; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupResult() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + lastValue.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java new file mode 100644 index 0000000..3c59268 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal max() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected HiveDecimalWritable max; + + public VectorPTFEvaluatorDecimalMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + max = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine maximum of all non-null decimal column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + if (isGroupResultNull) { + max.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } else { + HiveDecimalWritable repeatedMax = decimalColVector.vector[0]; + if (repeatedMax.compareTo(max) == 1) { + max.set(repeatedMax); + } + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + max.set(vector[0]); + isGroupResultNull = false; + } else { + final HiveDecimalWritable dec = vector[0]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + for (int i = 1; i < size; i++) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + max.set(vector[i++]); + isGroupResultNull = false; + } else { + final HiveDecimalWritable dec = vector[i++]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupResult() { + return max; + } + + private static HiveDecimal MIN_VALUE = HiveDecimal.create("-99999999999999999999999999999999999999"); + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + max.set(MIN_VALUE); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java new file mode 100644 index 0000000..0f7ea04 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal min() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected HiveDecimalWritable min; + + public VectorPTFEvaluatorDecimalMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + min = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine minimum of all non-null decimal column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + if (isGroupResultNull) { + min.set(decimalColVector.vector[0]); + isGroupResultNull = false; + } else { + HiveDecimalWritable repeatedMin = decimalColVector.vector[0]; + if (repeatedMin.compareTo(min) == -1) { + min.set(repeatedMin); + } + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + min.set(vector[0]); + isGroupResultNull = false; + } else { + final HiveDecimalWritable dec = vector[0]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + for (int i = 1; i < size; i++) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + min.set(vector[i++]); + isGroupResultNull = false; + } else { + final HiveDecimalWritable dec = vector[i++]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupResult() { + return min; + } + + private static HiveDecimal MAX_VALUE = HiveDecimal.create("99999999999999999999999999999999999999"); + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + min.set(MAX_VALUE); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java new file mode 100644 index 0000000..8300781 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal sum() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected HiveDecimalWritable sum; + protected HiveDecimalWritable temp; + + public VectorPTFEvaluatorDecimalSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null decimal column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls) { + temp.setFromLong(batch.size); + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(decimalColVector.vector[0]); + sum.mutateMultiply(temp); + isGroupResultNull = false; + } else { + temp.mutateMultiply(decimalColVector.vector[0]); + sum.mutateAdd(temp); + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(vector[0]); + isGroupResultNull = false; + } else { + sum.mutateAdd(vector[0]); + } + for (int i = 1; i < size; i++) { + sum.mutateAdd(vector[i]); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum.set(vector[i++]); + isGroupResultNull = false; + } else { + sum.mutateAdd(vector[i++]); + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + } + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupResult() { + return sum; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum.set(HiveDecimal.ZERO);; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java new file mode 100644 index 0000000..62f7aa5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates rank() for a PTF group. + * + * Dense rank starts at 1; the same dense rank is streamed to the output column as repeated; after + * the last group row, the dense rank incremented by 1. + */ +public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDenseRank.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int denseRank; + + public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; + longColVector.isRepeating = true; + longColVector.noNulls = true; + longColVector.isNull[0] = false; + longColVector.vector[0] = denseRank; + + if (isLastGroupBatch) { + denseRank++; + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + denseRank = 1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java new file mode 100644 index 0000000..2c379d7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorDoubleAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected double sum; + private int nonNullGroupCount; + private double avg; + + public VectorPTFEvaluatorDoubleAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null double column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls) { + + // We have a repeated value. The sum increases by value * batch.size. + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = doubleColVector.vector[0] * batch.size; + isGroupResultNull = false; + } else { + sum += doubleColVector.vector[0] * batch.size; + } + nonNullGroupCount += size; + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + nonNullGroupCount += size; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varSum = vector[i++]; + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + nonNullGroupCount++; + } + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } + + if (isLastGroupBatch) { + if (!isGroupResultNull) { + avg = sum / nonNullGroupCount; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return avg; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum = 0.0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java new file mode 100644 index 0000000..f9d819d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double first_value() for a PTF group. + * + * We capture the first value from the first batch. It can be NULL. + * We then set (stream) the output column with that value as repeated in each batch. + */ +public class VectorPTFEvaluatorDoubleFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isGroupResultNull; + protected double firstValue; + + public VectorPTFEvaluatorDoubleFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // First row determines isGroupResultNull and double firstValue; stream fill result as repeated. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!haveFirstValue) { + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + firstValue = doubleColVector.vector[0]; + isGroupResultNull = false; + } + } else if (doubleColVector.noNulls) { + firstValue = doubleColVector.vector[0]; + isGroupResultNull = false; + } else { + if (!doubleColVector.isNull[0]) { + firstValue = doubleColVector.vector[0]; + isGroupResultNull = false; + } + } + haveFirstValue = true; + } + + // First value is repeated for all batches. + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + outputColVector.isRepeating = true; + if (isGroupResultNull) { + outputColVector.noNulls = false; + outputColVector.isNull[0] = true; + } else { + outputColVector.noNulls = true; + outputColVector.isNull[0] = false; + outputColVector.vector[0] = firstValue; + } + } + + public boolean streamsResult() { + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isGroupResultNull = true; + firstValue = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java new file mode 100644 index 0000000..f20e4d9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double first_value() for a PTF group. + * + * We capture the last value from the last batch. It can be NULL. + * It becomes the group value. + */ +public class VectorPTFEvaluatorDoubleLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected double lastValue; + + public VectorPTFEvaluatorDoubleLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Last row of last batch determines isGroupResultNull and double lastValue. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!isLastGroupBatch) { + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + lastValue = doubleColVector.vector[0]; + isGroupResultNull = false; + } + } else if (doubleColVector.noNulls) { + lastValue = doubleColVector.vector[size - 1]; + isGroupResultNull = false; + } else { + final int lastBatchIndex = size - 1; + if (!doubleColVector.isNull[lastBatchIndex]) { + lastValue = doubleColVector.vector[lastBatchIndex]; + isGroupResultNull = false; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + lastValue = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java new file mode 100644 index 0000000..3210e18 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double max() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected double max; + + public VectorPTFEvaluatorDoubleMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine maximum of all non-null double column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isGroupResultNull) { + max = doubleColVector.vector[0]; + isGroupResultNull = false; + } else { + final double repeatedMax = doubleColVector.vector[0]; + if (repeatedMax < max) { + max = repeatedMax; + } + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varMax = vector[0]; + for (int i = 1; i < size; i++) { + final double d = vector[i]; + if (d > varMax) { + varMax = d; + } + } + if (isGroupResultNull) { + max = varMax; + isGroupResultNull = false; + } else if (varMax > max) { + max = varMax; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varMax = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double d = vector[i]; + if (d > varMax) { + varMax = d; + } + } + } + if (isGroupResultNull) { + max = varMax; + isGroupResultNull = false; + } else if (varMax > max) { + max = varMax; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return max; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + max = Double.MIN_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java new file mode 100644 index 0000000..d5a35ff --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double min() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected double min; + + public VectorPTFEvaluatorDoubleMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine minimum of all non-null double column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isGroupResultNull) { + min = doubleColVector.vector[0]; + isGroupResultNull = false; + } else { + final double repeatedMin = doubleColVector.vector[0]; + if (repeatedMin < min) { + min = repeatedMin; + } + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varMin = vector[0]; + for (int i = 1; i < size; i++) { + final double d = vector[i]; + if (d < varMin) { + varMin = d; + } + } + if (isGroupResultNull) { + min = varMin; + isGroupResultNull = false; + } else if (varMin < min) { + min = varMin; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varMin = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double d = vector[i]; + if (d < varMin) { + varMin = d; + } + } + } + if (isGroupResultNull) { + min = varMin; + isGroupResultNull = false; + } else if (varMin < min) { + min = varMin; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return min; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + min = Double.MAX_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java new file mode 100644 index 0000000..45cc0ca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double sum() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected double sum; + + public VectorPTFEvaluatorDoubleSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null double column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls) { + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = doubleColVector.vector[0] * batch.size; + isGroupResultNull = false; + } else { + sum += doubleColVector.vector[0] * batch.size; + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varSum = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + } + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return sum; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java new file mode 100644 index 0000000..2bfc12e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorLongAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected long sum; + private int nonNullGroupCount; + private double avg; + + public VectorPTFEvaluatorLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null long column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + + if (longColVector.noNulls) { + + // We have a repeated value. The sum increases by value * batch.size. + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = longColVector.vector[0] * batch.size; + isGroupResultNull = false; + } else { + sum += longColVector.vector[0] * batch.size; + } + nonNullGroupCount += size; + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + nonNullGroupCount += size; + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varSum = vector[i++]; + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + nonNullGroupCount++; + } + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } + + if (isLastGroupBatch) { + if (!isGroupResultNull) { + avg = ((double) sum) / nonNullGroupCount; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupResult() { + return avg; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum = 0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java new file mode 100644 index 0000000..96833ba --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long first_value() for a PTF group. + * + * We capture the first value from the first batch. It can be NULL. + * We then set (stream) the output column with that value as repeated in each batch. + */ +public class VectorPTFEvaluatorLongFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isGroupResultNull; + protected long firstValue; + + public VectorPTFEvaluatorLongFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // First row determines isGroupResultNull and long firstValue; stream fill result as repeated. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!haveFirstValue) { + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + firstValue = longColVector.vector[0]; + isGroupResultNull = false; + } + } else if (longColVector.noNulls) { + firstValue = longColVector.vector[0]; + isGroupResultNull = false; + } else { + if (!longColVector.isNull[0]) { + firstValue = longColVector.vector[0]; + isGroupResultNull = false; + } + } + haveFirstValue = true; + } + + // First value is repeated for all batches. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + outputColVector.isRepeating = true; + if (isGroupResultNull) { + outputColVector.noNulls = false; + outputColVector.isNull[0] = true; + } else { + outputColVector.noNulls = true; + outputColVector.isNull[0] = false; + outputColVector.vector[0] = firstValue; + } + } + + public boolean streamsResult() { + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isGroupResultNull = true; + firstValue = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java new file mode 100644 index 0000000..bd9ebe2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long first_value() for a PTF group. + * + * We capture the last value from the last batch. It can be NULL. + * It becomes the group value. + */ +public class VectorPTFEvaluatorLongLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected long lastValue; + + public VectorPTFEvaluatorLongLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Last row of last batch determines isGroupResultNull and long lastValue. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!isLastGroupBatch) { + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + lastValue = longColVector.vector[0]; + isGroupResultNull = false; + } + } else if (longColVector.noNulls) { + lastValue = longColVector.vector[size - 1]; + isGroupResultNull = false; + } else { + final int lastBatchIndex = size - 1; + if (!longColVector.isNull[lastBatchIndex]) { + lastValue = longColVector.vector[lastBatchIndex]; + isGroupResultNull = false; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + lastValue = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java new file mode 100644 index 0000000..6e6e739 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long max() for a PTF group. + */ +public class VectorPTFEvaluatorLongMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected long max; + + public VectorPTFEvaluatorLongMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine maximum of all non-null long column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isGroupResultNull) { + max = longColVector.vector[0]; + isGroupResultNull = false; + } else { + final long repeatedMax = longColVector.vector[0]; + if (repeatedMax > max) { + max = repeatedMax; + } + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varMax = vector[0]; + for (int i = 1; i < size; i++) { + final long l = vector[i]; + if (l > varMax) { + varMax = l; + } + } + if (isGroupResultNull) { + max = varMax; + isGroupResultNull = false; + } else if (varMax > max) { + max = varMax; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varMax = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long l = vector[i]; + if (l > varMax) { + varMax = l; + } + } + } + if (isGroupResultNull) { + max = varMax; + isGroupResultNull = false; + } else if (varMax > max) { + max = varMax; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return max; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + max = Long.MIN_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java new file mode 100644 index 0000000..9045334 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long min() for a PTF group. + */ +public class VectorPTFEvaluatorLongMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected long min; + + public VectorPTFEvaluatorLongMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Determine minimum of all non-null long column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isGroupResultNull) { + min = longColVector.vector[0]; + isGroupResultNull = false; + } else { + final long repeatedMin = longColVector.vector[0]; + if (repeatedMin < min) { + min = repeatedMin; + } + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varMin = vector[0]; + for (int i = 1; i < size; i++) { + final long l = vector[i]; + if (l < varMin) { + varMin = l; + } + } + if (isGroupResultNull) { + min = varMin; + isGroupResultNull = false; + } else if (varMin < min) { + min = varMin; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varMin = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long l = vector[i]; + if (l < varMin) { + varMin = l; + } + } + } + if (isGroupResultNull) { + min = varMin; + isGroupResultNull = false; + } else if (varMin < min) { + min = varMin; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return min; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + min = Long.MAX_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java new file mode 100644 index 0000000..24be1c0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long sum() for a PTF group. + */ +public class VectorPTFEvaluatorLongSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isGroupResultNull; + protected long sum; + + public VectorPTFEvaluatorLongSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + // Sum all non-null long column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + + if (longColVector.noNulls) { + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = longColVector.vector[0] * batch.size; + isGroupResultNull = false; + } else { + sum += longColVector.vector[0] * batch.size; + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varSum = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + } + } + if (isGroupResultNull) { + + // First aggregation calculation for group. + sum = varSum; + isGroupResultNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public boolean isGroupResultNull() { + return isGroupResultNull; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupResult() { + return sum; + } + + @Override + public void resetEvaluator() { + isGroupResultNull = true; + sum = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java new file mode 100644 index 0000000..5b48e2f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates rank() for a PTF group. + * + * Rank starts at 1; the same rank is streamed to the output column as repeated; after the last + * group row, the rank is increased by the number of group rows. + */ +public class VectorPTFEvaluatorRank extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorRank.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int rank; + private int groupCount; + + public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; + longColVector.isRepeating = true; + longColVector.noNulls = true; + longColVector.isNull[0] = false; + longColVector.vector[0] = rank; + groupCount += batch.size; + + if (isLastGroupBatch) { + rank += groupCount; + groupCount = 0; + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + rank = 1; + groupCount = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java new file mode 100644 index 0000000..b99fe66 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates row_number() for a PTF group. + * + * Row number starts at 1; stream row number to output column for each row and increment. + */ +public class VectorPTFEvaluatorRowNumber extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorRowNumber.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int rowNumber; + + public VectorPTFEvaluatorRowNumber(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + evaluateInputExpr(batch); + + final int size = batch.size; + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + vector[i] = rowNumber++; + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + public boolean isGroupResultNull() { + return false; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + rowNumber = 1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java new file mode 100644 index 0000000..a843f48 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +/** + * This class is encapsulates one or more VectorizedRowBatch of a PTF group. + */ +public class VectorPTFGroupBatches { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFGroupBatches.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private VectorPTFEvaluatorBase[] evaluators; + private int[] outputColumnMap; + private int[] keyInputColumnMap; + private int[] bufferedColumnMap; + + private ArrayList bufferedBatches; + + private VectorizedRowBatch overflowBatch; + + private int allocatedBufferedBatchCount; + private int currentBufferedBatchCount; + + public VectorPTFGroupBatches() { + allocatedBufferedBatchCount = 0; + currentBufferedBatchCount = 0; + } + + public void init(VectorPTFEvaluatorBase[] evaluators, int[] outputColumnMap, + int[] keyInputColumnMap, int[] nonKeyInputColumnMap, int[] streamingColumnMap, + VectorizedRowBatch overflowBatch) { + this.evaluators = evaluators; + this.outputColumnMap = outputColumnMap; + this.keyInputColumnMap = keyInputColumnMap; + final int nonKeyInputColumnCount = nonKeyInputColumnMap.length; + final int streamingColumnCount = streamingColumnMap.length; + final int bufferedColumnCount = nonKeyInputColumnCount + streamingColumnCount; + bufferedColumnMap = new int[bufferedColumnCount]; + for (int i = 0; i < nonKeyInputColumnCount; i++) { + bufferedColumnMap[i] = nonKeyInputColumnMap[i]; + } + for (int i = nonKeyInputColumnCount; i < bufferedColumnCount; i++) { + bufferedColumnMap[i] = streamingColumnMap[i - nonKeyInputColumnCount]; + } + this.overflowBatch = overflowBatch; + bufferedBatches = new ArrayList(0); + } + + public void evaluateStreamingGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + + // Streaming evaluators fill in their results during the evaluate call. + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + } + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + } + } + + private void fillGroupResults(VectorizedRowBatch batch) { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + final int outputColumnNum = evaluator.getOutputColumnNum(); + if (evaluator.streamsResult()) { + continue; + } + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + outputColVector.isRepeating = true; + final boolean isGroupResultNull = evaluator.isGroupResultNull(); + outputColVector.isNull[0] = isGroupResultNull; + if (isGroupResultNull) { + outputColVector.noNulls = false; + } else { + outputColVector.noNulls = true; + switch (evaluator.getResultColumnVectorType()) { + case LONG: + ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupResult(); + break; + case DOUBLE: + ((DoubleColumnVector) outputColVector).vector[0] = evaluator.getDoubleGroupResult(); + break; + case DECIMAL: + ((DecimalColumnVector) outputColVector).vector[0].set(evaluator.getDecimalGroupResult()); + break; + default: + throw new RuntimeException("Unexpected column vector type " + evaluator.getResultColumnVectorType()); + } + } + } + } + + private void forwardBufferedBatches(VectorPTFOperator vecPTFOperator, int index) + throws HiveException { + VectorizedRowBatch bufferedBatch = bufferedBatches.get(index); + + final int size = bufferedColumnMap.length; + for (int i = 0; i < size; i++) { + + // Swap ColumnVectors with overflowBatch. We remember buffered columns compactly in the + // buffered VRBs without other columns or scratch columns. + VectorizedBatchUtil.swapColumnVector( + bufferedBatch, i, overflowBatch, bufferedColumnMap[i]); + + overflowBatch.size = bufferedBatch.size; + fillGroupResults(overflowBatch); + vecPTFOperator.forward(overflowBatch, null); + } + } + + public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator, + VectorizedRowBatch lastBatch) throws HiveException { + + if (currentBufferedBatchCount > 0) { + + // Set partition and order columns in overflowBatch. + // We can set by ref since our last batch is held by us. + final int keyInputColumnCount = keyInputColumnMap.length; + for (int i = 0; i < keyInputColumnCount; i++) { + VectorizedBatchUtil.copyRepeatingColumn(lastBatch, i, overflowBatch, i, /* setByValue */ false); + } + + for (int i = 0; i < currentBufferedBatchCount; i++) { + forwardBufferedBatches(vecPTFOperator, i); + } + currentBufferedBatchCount = 0; + } + + fillGroupResults(lastBatch); + + // Save original projection. + int[] originalProjections = lastBatch.projectedColumns; + int originalProjectionSize = lastBatch.projectionSize; + + // Project with the output of our operator. + lastBatch.projectionSize = outputColumnMap.length; + lastBatch.projectedColumns = outputColumnMap; + + vecPTFOperator.forward(lastBatch, null); + + // Revert the projected columns back, because batch can be re-used by our parent operators. + lastBatch.projectionSize = originalProjectionSize; + lastBatch.projectedColumns = originalProjections; + + } + + public void resetEvaluators() { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.resetEvaluator(); + } + } + + private VectorizedRowBatch newBufferedBatch(VectorizedRowBatch batch) throws HiveException { + final int bufferedColumnCount = bufferedColumnMap.length; + VectorizedRowBatch newBatch = new VectorizedRowBatch(bufferedColumnCount); + for (int i = 0; i < bufferedColumnCount; i++) { + newBatch.cols[i] = + VectorizedBatchUtil.makeLikeColumnVector(batch.cols[bufferedColumnMap[i]]); + newBatch.cols[i].init(); + } + return newBatch; + } + + public void bufferGroupBatch(VectorizedRowBatch batch) throws HiveException { + + final int bufferedColumnCount = bufferedColumnMap.length; + if (allocatedBufferedBatchCount <= currentBufferedBatchCount) { + VectorizedRowBatch newBatch = newBufferedBatch(batch); + bufferedBatches.add(newBatch); + allocatedBufferedBatchCount++; + } + + VectorizedRowBatch bufferedBatch = bufferedBatches.get(currentBufferedBatchCount++); + + for (int i = 0; i < bufferedColumnCount; i++) { + VectorizedBatchUtil.swapColumnVector( + batch, bufferedColumnMap[i], bufferedBatch, i); + } + + bufferedBatch.size = batch.size; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java new file mode 100644 index 0000000..7522624 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -0,0 +1,570 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import java.io.IOException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class is native vectorized PTF operator class. + */ +public class VectorPTFOperator extends Operator + implements VectorizationContextRegion { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFOperator.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private VectorPTFDesc vectorDesc; + + /** + * Information about our native vectorized PTF created by the Vectorizer class during + * it decision process and useful for execution. + */ + private VectorPTFInfo vectorPTFInfo; + + private VectorizationContext vContext; + + // This is the vectorized row batch description of the output of the native vectorized PTF + // operator. It is based on the incoming vectorization context. Its projection may include + // a mixture of input columns and new scratch columns (for the aggregation output). + protected VectorizationContext vOutContext; + + private boolean isPartitionOrderBy; + + /** + * PTF vector expressions. + */ + + // This is map of which vectorized row batch columns are the input columns and the group value + // (aggregation) output columns. + // And, their types. + private int[] outputColumnMap; + private String[] outputColumnNames; + private TypeInfo[] outputTypeInfos; + + private int evaluatorCount; + private String[] evaluatorFunctionNames; + private WindowFrameDef[] evaluatorWindowFrameDefs; + private VectorExpression[] evaluatorInputExpressions; + private Type[] evaluatorInputColumnVectorTypes; + + private ExprNodeDesc[] orderExprNodeDescs; + private int[] orderColumnMap; + private Type[] orderColumnVectorTypes; + private VectorExpression[] orderExpressions; + + private ExprNodeDesc[] partitionExprNodeDescs; + private int[] partitionColumnMap; + private Type[] partitionColumnVectorTypes; + private VectorExpression[] partitionExpressions; + + private int[] keyInputColumnMap; + private int[] nonKeyInputColumnMap; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient boolean isLastGroupBatch; + + private transient VectorizedRowBatch overflowBatch; + + private transient VectorPTFGroupBatches groupBatches; + + private transient VectorPTFEvaluatorBase[] evaluators; + + private transient int[] streamingColumnMap; + + private transient boolean allEvaluatorsAreStreaming; + + private transient boolean isFirstPartition; + + private transient boolean[] currentPartitionIsNull; + private transient long[] currentPartitionLongs; + private transient double[] currentPartitionDoubles; + private transient byte[][] currentPartitionByteArrays; + private transient int[] currentPartitionByteLengths; + private transient HiveDecimalWritable[] currentPartitionDecimals; + private transient Timestamp[] currentPartitionTimestamps; + private transient HiveIntervalDayTime[] currentPartitionIntervalDayTimes; + + // For debug tracing: the name of the map or reduce task. + private transient String taskName; + + // Debug display. + private transient long batchCounter; + + //--------------------------------------------------------------------------- + + /** Kryo ctor. */ + protected VectorPTFOperator() { + super(); + } + + public VectorPTFOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorPTFOperator(CompilationOpContext ctx, + VectorizationContext vContext, OperatorDesc conf) throws HiveException { + this(ctx); + + LOG.info("VectorPTF constructor"); + + PTFDesc desc = (PTFDesc) conf; + this.conf = desc; + vectorDesc = (VectorPTFDesc) desc.getVectorDesc(); + vectorPTFInfo = vectorDesc.getVectorPTFInfo(); + this.vContext = vContext; + + isPartitionOrderBy = vectorDesc.getIsPartitionOrderBy(); + + outputColumnNames = vectorDesc.getOutputColumnNames(); + outputTypeInfos = vectorDesc.getOutputTypeInfos(); + outputColumnMap = vectorPTFInfo.getOutputColumnMap(); + + /* + * Create a new vectorization context to create a new projection, but keep + * same output column manager must be inherited to track the scratch the columns. + */ + vOutContext = new VectorizationContext(getName(), this.vContext); + setupVOutContext(); + + evaluatorFunctionNames = vectorDesc.getEvaluatorFunctionNames(); + evaluatorCount = evaluatorFunctionNames.length; + evaluatorWindowFrameDefs = vectorDesc.getEvaluatorWindowFrameDefs(); + evaluatorInputExpressions = vectorPTFInfo.getEvaluatorInputExpressions(); + evaluatorInputColumnVectorTypes = vectorPTFInfo.getEvaluatorInputColumnVectorTypes(); + + orderExprNodeDescs = vectorDesc.getOrderExprNodeDescs(); + orderColumnMap = vectorPTFInfo.getOrderColumnMap(); + orderColumnVectorTypes = vectorPTFInfo.getOrderColumnVectorTypes(); + orderExpressions = vectorPTFInfo.getOrderExpressions(); + + partitionExprNodeDescs = vectorDesc.getPartitionExprNodeDescs(); + partitionColumnMap = vectorPTFInfo.getPartitionColumnMap(); + partitionColumnVectorTypes = vectorPTFInfo.getPartitionColumnVectorTypes(); + partitionExpressions = vectorPTFInfo.getPartitionExpressions(); + + keyInputColumnMap = vectorPTFInfo.getKeyInputColumnMap(); + nonKeyInputColumnMap = vectorPTFInfo.getNonKeyInputColumnMap(); + } + + /** + * Setup the vectorized row batch description of the output of the native vectorized PTF + * operator. Use the output projection we previously built from a mixture of input + * columns and new scratch columns. + */ + protected void setupVOutContext() { + vOutContext.resetProjectionColumns(); + final int count = outputColumnNames.length; + for (int i = 0; i < count; ++i) { + String columnName = outputColumnNames[i]; + int outputColumn = outputColumnMap[i]; + vOutContext.addProjectionColumn(columnName, outputColumn); + } + } + + /* + * Allocate overflow batch columns by hand. + */ + private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, int outputColumn, + String typeName) throws HiveException { + + if (overflowBatch.cols[outputColumn] == null) { + typeName = VectorizationContext.mapTypeNameSynonyms(typeName); + + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo); + } + } + + /* + * Setup our 2nd batch with the same "column schema" as the output columns plus any scratch + * columns since the overflow batch will get forwarded to children operators. + */ + protected VectorizedRowBatch setupOverflowBatch() throws HiveException { + + int initialColumnCount = vContext.firstOutputColumnIndex(); + VectorizedRowBatch overflowBatch; + + int totalNumColumns = initialColumnCount + vOutContext.getScratchColumnTypeNames().length; + overflowBatch = new VectorizedRowBatch(totalNumColumns); + + // First, just allocate just the output columns we will be using. + for (int i = 0; i < outputColumnMap.length; i++) { + int outputColumn = outputColumnMap[i]; + String typeName = outputTypeInfos[i].getTypeName(); + allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); + } + + // Now, add any scratch columns needed for children operators. + int outputColumn = initialColumnCount; + for (String typeName : vOutContext.getScratchColumnTypeNames()) { + allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); + } + + overflowBatch.projectedColumns = outputColumnMap; + overflowBatch.projectionSize = outputColumnMap.length; + + overflowBatch.reset(); + + return overflowBatch; + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + if (LOG.isDebugEnabled()) { + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + taskName = work.getName(); + } + + if (!isPartitionOrderBy) { + currentPartitionIsNull = null; + currentPartitionLongs = null; + currentPartitionDoubles = null; + currentPartitionByteArrays = null; + currentPartitionByteLengths = null; + currentPartitionDecimals = null; + currentPartitionTimestamps = null; + currentPartitionIntervalDayTimes = null; + } else { + final int partitionKeyCount = vectorDesc.getPartitionExprNodeDescs().length; + currentPartitionIsNull = new boolean[partitionKeyCount]; + currentPartitionLongs = new long[partitionKeyCount]; + currentPartitionDoubles = new double[partitionKeyCount]; + currentPartitionByteArrays = new byte[partitionKeyCount][]; + currentPartitionByteLengths = new int[partitionKeyCount]; + currentPartitionDecimals = new HiveDecimalWritable[partitionKeyCount]; + currentPartitionTimestamps = new Timestamp[partitionKeyCount]; + currentPartitionIntervalDayTimes = new HiveIntervalDayTime[partitionKeyCount]; + } + + evaluators = VectorPTFDesc.getEvaluators(vectorDesc, vectorPTFInfo); + + streamingColumnMap = VectorPTFDesc.getStreamingColumnMap(evaluators); + + allEvaluatorsAreStreaming = (streamingColumnMap.length == evaluatorCount); + + /* + * Setup the overflow batch. + */ + overflowBatch = setupOverflowBatch(); + + groupBatches = new VectorPTFGroupBatches(); + groupBatches.init( + evaluators, outputColumnMap, keyInputColumnMap, nonKeyInputColumnMap, streamingColumnMap, overflowBatch); + + isFirstPartition = true; + + batchCounter = 0; + } + + @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + this.isLastGroupBatch = isLastGroupBatch; + } + + /** + * We are processing a batch from reduce processor that is only for one reducer key or PTF group. + * + * For a simple OVER (PARTITION BY column) or OVER (ORDER BY column), the reduce processor's + * group key is the partition or order by key. + * + * For an OVER (PARTITION BY column1, ORDER BY column2), the reduce-shuffle group key is + * the combination of the partition column1 and the order by column2. In this case, this method + * has to watch for changes in the partition and reset the group aggregations. + * + * The reduce processor calls setNextVectorBatchGroupStatus beforehand to tell us whether the + * batch supplied to our process method is the last batch for the group key, or not. This helps + * us intelligently process the batch. + */ + @Override + public void process(Object row, int tag) throws HiveException { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + for (VectorExpression orderExpression : orderExpressions) { + orderExpression.evaluate(batch); + } + + if (partitionExpressions != null) { + for (VectorExpression partitionExpression : partitionExpressions) { + partitionExpression.evaluate(batch); + } + } + + if (isPartitionOrderBy) { + + // Check for PARTITION BY key change when we have ORDER BY keys. + if (isFirstPartition) { + isFirstPartition = false; + setCurrentPartition(batch); + } else if (isPartitionChanged(batch)) { + setCurrentPartition(batch); + groupBatches.resetEvaluators(); + } + } + + if (allEvaluatorsAreStreaming) { + + // We can process this batch immediately. + groupBatches.evaluateStreamingGroupBatch(batch, isLastGroupBatch); + forward(batch, null); + + } else { + + // Evaluate the aggregation functions over the group batch. + groupBatches.evaluateGroupBatch(batch, isLastGroupBatch); + + if (!isLastGroupBatch) { + + // The group spans a VectorizedRowBatch. Swap the relevant columns into our batch buffers, + // or write the batch to temporary storage. + groupBatches.bufferGroupBatch(batch); + return; + } + + /* + * Last group batch. + * + * Take the (non-streaming) group aggregation values and write output columns for all + * rows of every batch of the group. As each group batch is finished being written, they are + * forwarded to the next operator. + */ + groupBatches.fillGroupResultsAndForward(this, batch); + } + + // If we are only processing a PARTITION BY, reset our evaluators. + if (!isPartitionOrderBy) { + groupBatches.resetEvaluators(); + } + } + + private boolean isPartitionChanged(VectorizedRowBatch batch) { + + final int count = partitionColumnMap.length; + for (int i = 0; i < count; i++) { + ColumnVector colVector = batch.cols[partitionColumnMap[i]]; + + // Vector reduce key (i.e. partition) columns are repeated -- so we test element 0. + + final boolean isNull = !colVector.noNulls && colVector.isNull[0]; + final boolean currentIsNull = currentPartitionIsNull[i]; + + if (isNull != currentIsNull) { + return true; + } + if (isNull) { + continue; + } + + switch (partitionColumnVectorTypes[i]) { + case LONG: + if (currentPartitionLongs[i] != ((LongColumnVector) colVector).vector[0]) { + return true; + } + break; + case DOUBLE: + if (currentPartitionDoubles[i] != ((DoubleColumnVector) colVector).vector[0]) { + return true; + } + break; + case BYTES: + { + BytesColumnVector byteColVector = (BytesColumnVector) colVector; + byte[] bytes = byteColVector.vector[0]; + final int start = byteColVector.start[0]; + final int length = byteColVector.length[0]; + if (!StringExpr.equal( + bytes, start, length, + currentPartitionByteArrays[i], 0, currentPartitionByteLengths[i])) { + return true; + } + } + break; + case DECIMAL: + if (!currentPartitionDecimals[i].equals(((DecimalColumnVector) colVector).vector[0])) { + return true; + } + break; + case TIMESTAMP: + if (((TimestampColumnVector) colVector).compareTo(0, currentPartitionTimestamps[i]) != 0) { + return true; + } + break; + case INTERVAL_DAY_TIME: + if (((IntervalDayTimeColumnVector) colVector).compareTo(0, currentPartitionIntervalDayTimes[i]) != 0) { + return true; + } + break; + default: + throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]); + } + } + return false; + } + + private void setCurrentPartition(VectorizedRowBatch batch) { + + final int count = partitionColumnMap.length; + for (int i = 0; i < count; i++) { + ColumnVector colVector = batch.cols[partitionColumnMap[i]]; + + // Partition columns are repeated -- so we test element 0. + + final boolean isNull = !colVector.noNulls && colVector.isNull[0]; + currentPartitionIsNull[i] = isNull; + + if (isNull) { + continue; + } + + switch (partitionColumnVectorTypes[i]) { + case LONG: + currentPartitionLongs[i] = ((LongColumnVector) colVector).vector[0]; + break; + case DOUBLE: + currentPartitionDoubles[i] = ((DoubleColumnVector) colVector).vector[0]; + break; + case BYTES: + { + BytesColumnVector byteColVector = (BytesColumnVector) colVector; + byte[] bytes = byteColVector.vector[0]; + final int start = byteColVector.start[0]; + final int length = byteColVector.length[0]; + if (currentPartitionByteArrays[i] == null || currentPartitionByteLengths[i] < length) { + currentPartitionByteArrays[i] = Arrays.copyOfRange(bytes, start, start + length); + } else { + System.arraycopy(bytes, start, currentPartitionByteArrays[i], 0, length); + } + currentPartitionByteLengths[i] = length; + } + break; + case DECIMAL: + if (currentPartitionDecimals[i] == null) { + currentPartitionDecimals[i] = new HiveDecimalWritable(); + } + currentPartitionDecimals[i].set(((DecimalColumnVector) colVector).vector[0]); + break; + case TIMESTAMP: + if (currentPartitionTimestamps[i] == null) { + currentPartitionTimestamps[i] = new Timestamp(0); + } + ((TimestampColumnVector) colVector).timestampUpdate(currentPartitionTimestamps[i], 0); + break; + case INTERVAL_DAY_TIME: + if (currentPartitionIntervalDayTimes[i] == null) { + currentPartitionIntervalDayTimes[i] = new HiveIntervalDayTime(); + } + ((IntervalDayTimeColumnVector) colVector).intervalDayTimeUpdate(currentPartitionIntervalDayTimes[i], 0); + break; + default: + throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]); + } + } + } + + @Override + public void forward(Object row, ObjectInspector rowInspector) throws HiveException { + super.forward(row, rowInspector); + } + + @Override + protected void closeOp(boolean abort) throws HiveException { + super.closeOp(abort); + + // We do not try to finish and flush an in-progress group because correct values require the + // last group batch. + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "PTF"; + } + + @Override + public OperatorType getType() { + return OperatorType.PTF; + } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vOutContext; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f0df2e9..efb5402 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -61,14 +61,24 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; @@ -95,6 +105,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; @@ -102,6 +113,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; @@ -112,10 +124,14 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; @@ -149,6 +165,13 @@ import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PartitionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; import org.apache.hadoop.hive.ql.udf.UDFAtan; @@ -190,6 +213,8 @@ import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator; +import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.NullStructSerDe; @@ -1659,7 +1684,7 @@ private ValidatorVectorizationContext(HiveConf hiveConf) { } @Override - protected int getInputColumnIndex(String name) { + public int getInputColumnIndex(String name) { return 0; } @@ -1818,6 +1843,9 @@ boolean validateReduceWorkOperator(Operator op) { ret = op instanceof SparkHashTableSinkOperator && validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; + case PTF: + ret = validatePTFOperator((PTFOperator) op); + break; default: setOperatorNotSupported(op); ret = false; @@ -2073,6 +2101,131 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { return true; } + /* + * Determine recursively if the PTF LEAD or LAG function is being used in an expression. + */ + private boolean containsLeadLag(ExprNodeDesc exprNodeDesc) { + if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genericFuncDesc = (ExprNodeGenericFuncDesc) exprNodeDesc; + GenericUDF genFuncClass = genericFuncDesc.getGenericUDF(); + if (genFuncClass instanceof GenericUDFLag || + genFuncClass instanceof GenericUDFLead) { + return true; + } + return containsLeadLag(genericFuncDesc.getChildren()); + } else { + // ExprNodeColumnDesc, ExprNodeConstantDesc, ExprNodeDynamicValueDesc, etc do not have + // LEAD/LAG inside. + return false; + } + } + + private boolean containsLeadLag(List exprNodeDescList) { + for (ExprNodeDesc exprNodeDesc : exprNodeDescList) { + if (containsLeadLag(exprNodeDesc)) { + return true; + } + } + return false; + } + + private boolean validatePTFOperator(PTFOperator op) { + PTFDesc ptfDesc = (PTFDesc) op.getConf(); + boolean isMapSide = ptfDesc.isMapSide(); + if (isMapSide) { + setOperatorIssue("PTF Mapper not supported"); + return false; + } + boolean forNoop = ptfDesc.forNoop(); + if (forNoop) { + setOperatorIssue("NOOP not supported"); + return false; + } + boolean forWindowing = ptfDesc.forWindowing(); + if (!forWindowing) { + setOperatorIssue("Windowing required"); + return false; + } + PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); + boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef); + if (!isWindowTableFunctionDef) { + setOperatorIssue("Must be a WindowTableFunctionDef"); + return false; + } + + // We collect information in VectorPTFDesc that doesn't need the VectorizationContext. + // We use this information for validation. Later when creating the vector operator + // we create an additional object VectorPTFInfo. + + VectorPTFDesc vectorPTFDesc = null; + try { + vectorPTFDesc = createVectorPTFDesc(op, ptfDesc); + } catch (HiveException e) { + setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } + ptfDesc.setVectorDesc(vectorPTFDesc); + + // Output columns ok? + String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames(); + TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos(); + final int outputCount = outputColumnNames.length; + for (int i = 0; i < outputCount; i++) { + String typeName = outputTypeInfos[i].getTypeName(); + boolean ret = validateDataType(typeName, Mode.PROJECTION); + if (!ret) { + setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported"); + return false; + } + } + + boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy(); + String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames(); + final int count = evaluatorFunctionNames.length; + WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs(); + List[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists(); + + for (int i = 0; i < count; i++) { + String functionName = evaluatorFunctionNames[i]; + SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName); + if (supportedFunctionType == null) { + setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames); + return false; + } + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + if (!windowFrameDef.isStartUnbounded()) { + setOperatorIssue(functionName + " only UNBOUNDED start frame is supported"); + return false; + } + switch (windowFrameDef.getWindowType()) { + case RANGE: + if (!windowFrameDef.getEnd().isCurrentRow()) { + setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE"); + return false; + } + break; + case ROWS: + if (!windowFrameDef.isEndUnbounded()) { + setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type"); + return false; + } + break; + default: + throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); + } + List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; + if (exprNodeDescList != null && exprNodeDescList.size() > 1) { + setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); + return false; + } + if (exprNodeDescList != null && containsLeadLag(exprNodeDescList)) { + setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName); + return false; + } + } + return true; + } + private boolean validateExprNodeDesc(List descs, String expressionTitle) { return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } @@ -3334,6 +3487,331 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { selectOp.getCompilationOpContext(), selectDesc, vContext, selectOp); } + private static void fillInPTFEvaluators( + List windowsFunctions, + String[] evaluatorFunctionNames, + WindowFrameDef[] evaluatorWindowFrameDefs, + List[] evaluatorInputExprNodeDescLists) throws HiveException { + final int functionCount = windowsFunctions.size(); + for (int i = 0; i < functionCount; i++) { + WindowFunctionDef winFunc = windowsFunctions.get(i); + evaluatorFunctionNames[i] = winFunc.getName(); + evaluatorWindowFrameDefs[i] = winFunc.getWindowFrame(); + + List args = winFunc.getArgs(); + if (args != null) { + + List exprNodeDescList = new ArrayList(); + for (PTFExpressionDef arg : args) { + exprNodeDescList.add(arg.getExprNode()); + } + + evaluatorInputExprNodeDescLists[i] = exprNodeDescList; + } + } + } + + private static ExprNodeDesc[] getPartitionExprNodeDescs(List partitionExpressions) { + final int size = partitionExpressions.size(); + ExprNodeDesc[] exprNodeDescs = new ExprNodeDesc[size]; + for (int i = 0; i < size; i++) { + exprNodeDescs[i] = partitionExpressions.get(i).getExprNode(); + } + return exprNodeDescs; + } + + private static ExprNodeDesc[] getOrderExprNodeDescs(List orderExpressions) { + final int size = orderExpressions.size(); + ExprNodeDesc[] exprNodeDescs = new ExprNodeDesc[size]; + for (int i = 0; i < size; i++) { + exprNodeDescs[i] = orderExpressions.get(i).getExprNode(); + } + return exprNodeDescs; + } + + /* + * Create the VectorPTFDesc data that is used during validation and that doesn't rely on + * VectorizationContext to lookup column names, etc. + */ + private static VectorPTFDesc createVectorPTFDesc(Operator ptfOp, + PTFDesc ptfDesc) throws HiveException { + + PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); + + WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef; + List windowsFunctions = windowTableFunctionDef.getWindowFunctions(); + final int functionCount = windowsFunctions.size(); + + ArrayList outputSignature = ptfOp.getSchema().getSignature(); + final int outputSize = outputSignature.size(); + + /* + * Output columns. + */ + String[] outputColumnNames = new String[outputSize]; + TypeInfo[] outputTypeInfos = new TypeInfo[outputSize]; + for (int i = 0; i < functionCount; i++) { + ColumnInfo colInfo = outputSignature.get(i); + TypeInfo typeInfo = colInfo.getType(); + outputColumnNames[i] = colInfo.getInternalName(); + outputTypeInfos[i] = typeInfo; + } + for (int i = functionCount; i < outputSize; i++) { + ColumnInfo colInfo = outputSignature.get(i); + outputColumnNames[i] = colInfo.getInternalName(); + outputTypeInfos[i] = colInfo.getType(); + } + + List partitionExpressions = funcDef.getPartition().getExpressions(); + final int partitionKeyCount = partitionExpressions.size(); + ExprNodeDesc[] partitionExprNodeDescs = getPartitionExprNodeDescs(partitionExpressions); + + List orderExpressions = funcDef.getOrder().getExpressions(); + final int orderKeyCount = orderExpressions.size(); + ExprNodeDesc[] orderExprNodeDescs = getOrderExprNodeDescs(orderExpressions); + + // When there are PARTITION and ORDER BY clauses, will have different partition expressions. + // Otherwise, only order by expressions. + boolean isPartitionOrderBy = false; + + if (partitionKeyCount != orderKeyCount) { + // Obviously different expressions. + isPartitionOrderBy = true; + } else { + // Check each ExprNodeDesc. + for (int i = 0; i < partitionKeyCount; i++) { + final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = + new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExprNodeDescs[i]); + final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = + new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExprNodeDescs[i]); + if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) { + isPartitionOrderBy = true; + break; + } + } + } + + String[] evaluatorFunctionNames = new String[functionCount]; + WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount]; + List[] evaluatorInputExprNodeDescLists = (List[]) new List[functionCount]; + + fillInPTFEvaluators( + windowsFunctions, + evaluatorFunctionNames, + evaluatorWindowFrameDefs, + evaluatorInputExprNodeDescLists); + + VectorPTFDesc vectorPTFDesc = new VectorPTFDesc(); + + vectorPTFDesc.setIsPartitionOrderBy(isPartitionOrderBy); + + vectorPTFDesc.setOrderExprNodeDescs(orderExprNodeDescs); + vectorPTFDesc.setPartitionExprNodeDescs(partitionExprNodeDescs); + + vectorPTFDesc.setEvaluatorFunctionNames(evaluatorFunctionNames); + vectorPTFDesc.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs); + vectorPTFDesc.setEvaluatorInputExprNodeDescLists(evaluatorInputExprNodeDescLists); + + vectorPTFDesc.setOutputColumnNames(outputColumnNames); + vectorPTFDesc.setOutputTypeInfos(outputTypeInfos); + + return vectorPTFDesc; + } + + private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnMap, + boolean isPartitionOrderBy, int[] orderColumnMap, int[] partitionColumnMap, + int evaluatorCount, ArrayList keyInputColumns, + ArrayList nonKeyInputColumns) { + + final int outputSize = outputColumnMap.length; + final int orderKeyCount = orderColumnMap.length; + final int partitionKeyCount = (isPartitionOrderBy ? partitionColumnMap.length : 0); + for (int i = evaluatorCount; i < outputSize; i++) { + final int nonEvalColumnNum = outputColumnMap[i]; + boolean isKey = false; + for (int o = 0; o < orderKeyCount; o++) { + if (nonEvalColumnNum == orderColumnMap[o]) { + isKey = true; + break; + } + } + if (!isKey && isPartitionOrderBy) { + for (int p = 0; p < partitionKeyCount; p++) { + if (nonEvalColumnNum == partitionColumnMap[p]) { + isKey = true; + break; + } + } + } + if (isKey) { + keyInputColumns.add(nonEvalColumnNum); + } else { + nonKeyInputColumns.add(nonEvalColumnNum); + } + } + } + + /* + * Create the additional vectorization PTF information needed by the VectorPTFOperator during + * execution. + */ + private static VectorPTFInfo createVectorPTFInfo(Operator ptfOp, + PTFDesc ptfDesc, VectorizationContext vContext) throws HiveException { + + PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); + + ArrayList outputSignature = ptfOp.getSchema().getSignature(); + final int outputSize = outputSignature.size(); + + VectorPTFDesc vectorPTFDesc = (VectorPTFDesc) ptfDesc.getVectorDesc(); + + boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy(); + ExprNodeDesc[] orderExprNodeDescs = vectorPTFDesc.getOrderExprNodeDescs(); + ExprNodeDesc[] partitionExprNodeDescs = vectorPTFDesc.getPartitionExprNodeDescs(); + String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames(); + + final int evaluatorCount = evaluatorFunctionNames.length; + WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs(); + List[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists(); + + /* + * Output columns. + */ + int[] outputColumnMap = new int[outputSize]; + for (int i = 0; i < evaluatorCount; i++) { + ColumnInfo colInfo = outputSignature.get(i); + TypeInfo typeInfo = colInfo.getType(); + final int outputColumnNum; + outputColumnNum = vContext.allocateScratchColumn(typeInfo); + outputColumnMap[i] = outputColumnNum; + } + for (int i = evaluatorCount; i < outputSize; i++) { + ColumnInfo colInfo = outputSignature.get(i); + outputColumnMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName()); + } + + /* + * Partition and order by. + */ + + int[] partitionColumnMap; + Type[] partitionColumnVectorTypes; + VectorExpression[] partitionExpressions; + + if (!isPartitionOrderBy) { + partitionColumnMap = null; + partitionColumnVectorTypes = null; + partitionExpressions = null; + } else { + final int partitionKeyCount = partitionExprNodeDescs.length; + partitionColumnMap = new int[partitionKeyCount]; + partitionColumnVectorTypes = new Type[partitionKeyCount]; + partitionExpressions = new VectorExpression[partitionKeyCount]; + + for (int i = 0; i < partitionKeyCount; i++) { + VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]); + String typeName = partitionExpression.getOutputType(); + typeName = VectorizationContext.mapTypeNameSynonyms(typeName); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + partitionColumnVectorTypes[i] = columnVectorType; + partitionColumnMap[i] = partitionExpression.getOutputColumn(); + partitionExpressions[i] = partitionExpression; + } + } + + final int orderKeyCount = orderExprNodeDescs.length; + int[] orderColumnMap = new int[orderKeyCount]; + Type[] orderColumnVectorTypes = new Type[orderKeyCount]; + VectorExpression[] orderExpressions = new VectorExpression[orderKeyCount]; + for (int i = 0; i < orderKeyCount; i++) { + VectorExpression orderExpression = vContext.getVectorExpression(orderExprNodeDescs[i]); + String typeName = orderExpression.getOutputType(); + typeName = VectorizationContext.mapTypeNameSynonyms(typeName); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + orderColumnVectorTypes[i] = columnVectorType; + orderColumnMap[i] = orderExpression.getOutputColumn(); + orderExpressions[i] = orderExpression; + } + + ArrayList keyInputColumns = new ArrayList(); + ArrayList nonKeyInputColumns = new ArrayList(); + determineKeyAndNonKeyInputColumnMap(outputColumnMap, isPartitionOrderBy, orderColumnMap, + partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); + int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0])); + int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0])); + + VectorExpression[] evaluatorInputExpressions = new VectorExpression[evaluatorCount]; + Type[] evaluatorInputColumnVectorTypes = new Type[evaluatorCount]; + for (int i = 0; i < evaluatorCount; i++) { + String functionName = evaluatorFunctionNames[i]; + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName); + + List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; + VectorExpression inputVectorExpression; + final Type columnVectorType; + if (exprNodeDescList != null) { + + // Validation has limited evaluatorInputExprNodeDescLists to size 1. + ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0); + + // Determine input vector expression using the VectorizationContext. + inputVectorExpression = vContext.getVectorExpression(exprNodeDesc); + + TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + } else { + inputVectorExpression = null; + columnVectorType = ColumnVector.Type.NONE; + } + + evaluatorInputExpressions[i] = inputVectorExpression; + evaluatorInputColumnVectorTypes[i] = columnVectorType; + } + + VectorPTFInfo vectorPTFInfo = new VectorPTFInfo(); + + vectorPTFInfo.setOutputColumnMap(outputColumnMap); + + vectorPTFInfo.setPartitionColumnMap(partitionColumnMap); + vectorPTFInfo.setPartitionColumnVectorTypes(partitionColumnVectorTypes); + vectorPTFInfo.setPartitionExpressions(partitionExpressions); + + vectorPTFInfo.setOrderColumnMap(orderColumnMap); + vectorPTFInfo.setOrderColumnVectorTypes(orderColumnVectorTypes); + vectorPTFInfo.setOrderExpressions(orderExpressions); + + vectorPTFInfo.setEvaluatorInputExpressions(evaluatorInputExpressions); + vectorPTFInfo.setEvaluatorInputColumnVectorTypes(evaluatorInputColumnVectorTypes); + + vectorPTFInfo.setKeyInputColumnMap(keyInputColumnMap); + vectorPTFInfo.setNonKeyInputColumnMap(nonKeyInputColumnMap); + + return vectorPTFInfo; + } + + /* + * NOTE: The VectorPTFDesc has already been allocated and populated. + */ + public static Operator vectorizePTFOperator( + Operator ptfOp, VectorizationContext vContext) + throws HiveException { + PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf(); + + VectorPTFDesc vectorPTFDesc = (VectorPTFDesc) ptfDesc.getVectorDesc(); + + VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext); + + vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo); + + Class> opClass = VectorPTFOperator.class; + return OperatorFactory.getVectorOperator( + ptfOp.getCompilationOpContext(), ptfDesc, vContext, ptfOp); + } + public Operator vectorizeOperator(Operator op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException { @@ -3508,6 +3986,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { isNative = true; } break; + case PTF: + vectorOp = vectorizePTFOperator(op, vContext); + isNative = true; + break; case HASHTABLESINK: { SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java index c4b49b6..29a41a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java @@ -20,16 +20,28 @@ import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; import org.apache.hadoop.hive.ql.parse.LeadLagInfo; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; import org.apache.hadoop.hive.ql.udf.ptf.Noop; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -114,4 +126,104 @@ public Configuration getCfg() { public void setCfg(Configuration cfg) { this.cfg = cfg; } + + // Since we don't have a non-native or pass-thru version of VectorPTFOperator, we do not + // have enableConditionsMet / enableConditionsNotMet like we have for VectorReduceSinkOperator, + // etc. + public class PTFOperatorExplainVectorization extends OperatorExplainVectorization { + + private final PTFDesc PTFDesc; + private final VectorPTFDesc vectorPTFDesc; + private final VectorPTFInfo vectorPTFInfo; + + private VectorizationCondition[] nativeConditions; + + public PTFOperatorExplainVectorization(PTFDesc PTFDesc, VectorDesc vectorDesc) { + // VectorPTFOperator is native vectorized. + super(vectorDesc, true); + this.PTFDesc = PTFDesc; + vectorPTFDesc = (VectorPTFDesc) vectorDesc; + vectorPTFInfo = vectorPTFDesc.getVectorPTFInfo(); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "functionNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getFunctionNames() { + return Arrays.toString(vectorPTFDesc.getEvaluatorFunctionNames()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "functionInputExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getFunctionInputExpressions() { + return Arrays.toString(vectorPTFInfo.getEvaluatorInputExpressions()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "partitionExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getPartitionExpressions() { + VectorExpression[] partitionExpressions = vectorPTFInfo.getPartitionExpressions(); + if (partitionExpressions == null) { + return null; + } + return Arrays.toString(partitionExpressions); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "orderExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getOrderExpressions() { + VectorExpression[] orderExpressions = vectorPTFInfo.getOrderExpressions(); + if (orderExpressions == null) { + return null; + } + return Arrays.toString(orderExpressions); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "evaluatorClasses", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getEvaluatorClasses() { + + VectorPTFEvaluatorBase[] evaluators = VectorPTFDesc.getEvaluators(vectorPTFDesc, vectorPTFInfo); + + ArrayList result = new ArrayList(evaluators.length); + for (VectorPTFEvaluatorBase evaluator : evaluators) { + result.add(evaluator.getClass().getSimpleName()); + } + return result.toString(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getOutputColumns() { + return Arrays.toString(vectorPTFInfo.getOutputColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outputTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getOutputTypes() { + return Arrays.toString(vectorPTFDesc.getOutputTypeInfos()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyInputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getKeyInputColumns() { + return Arrays.toString(vectorPTFInfo.getKeyInputColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "nonKeyInputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getNonKeyInputColumns() { + return Arrays.toString(vectorPTFInfo.getNonKeyInputColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "streamingColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getStreamingColumns() { + VectorPTFEvaluatorBase[] evaluators = VectorPTFDesc.getEvaluators(vectorPTFDesc, vectorPTFInfo); + ArrayList result = new ArrayList(); + for (VectorPTFEvaluatorBase evaluator : evaluators) { + if (evaluator.streamsResult()) { + result.add(evaluator.getOutputColumnNum()); + } + } + return result.toString(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "PTF Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public PTFOperatorExplainVectorization getPTFVectorization() { + if (vectorDesc == null) { + return null; + } + return new PTFOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java new file mode 100644 index 0000000..a2f4cbc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -0,0 +1,360 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.TreeSet; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCount; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCountStar; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorPTFDesc. + * + * Extra parameters beyond PTFDesc just for the VectorPTFOperator. + * + * We don't extend PTFDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorPTFDesc extends AbstractVectorDesc { + + private static final long serialVersionUID = 1L; + + public static enum SupportedFunctionType { + ROW_NUMBER, + RANK, + DENSE_RANK, + MIN, + MAX, + SUM, + AVG, + FIRST_VALUE, + LAST_VALUE, + COUNT + } + + public static HashMap supportedFunctionsMap = + new HashMap(); + static { + supportedFunctionsMap.put("row_number", SupportedFunctionType.ROW_NUMBER); + supportedFunctionsMap.put("rank", SupportedFunctionType.RANK); + supportedFunctionsMap.put("dense_rank", SupportedFunctionType.DENSE_RANK); + supportedFunctionsMap.put("min", SupportedFunctionType.MIN); + supportedFunctionsMap.put("max", SupportedFunctionType.MAX); + supportedFunctionsMap.put("sum", SupportedFunctionType.SUM); + supportedFunctionsMap.put("avg", SupportedFunctionType.AVG); + supportedFunctionsMap.put("first_value", SupportedFunctionType.FIRST_VALUE); + supportedFunctionsMap.put("last_value", SupportedFunctionType.LAST_VALUE); + supportedFunctionsMap.put("count", SupportedFunctionType.COUNT); + } + public static List supportedFunctionNames = new ArrayList(); + static { + TreeSet treeSet = new TreeSet(); + treeSet.addAll(supportedFunctionsMap.keySet()); + supportedFunctionNames.addAll(treeSet); + } + + private boolean isPartitionOrderBy; + + private String[] evaluatorFunctionNames; + private WindowFrameDef[] evaluatorWindowFrameDefs; + private List[] evaluatorInputExprNodeDescLists; + + private ExprNodeDesc[] orderExprNodeDescs; + private ExprNodeDesc[] partitionExprNodeDescs; + + private String[] outputColumnNames; + private TypeInfo[] outputTypeInfos; + + private VectorPTFInfo vectorPTFInfo; + + public VectorPTFDesc() { + isPartitionOrderBy = false; + + evaluatorFunctionNames = null; + evaluatorInputExprNodeDescLists = null; + + orderExprNodeDescs = null; + partitionExprNodeDescs = null; + + outputColumnNames = null; + outputTypeInfos = null; + } + + // We provide this public method to help EXPLAIN VECTORIZATION show the evaluator classes. + public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType functionType, + WindowFrameDef windowFrameDef, Type columnVectorType, VectorExpression inputVectorExpression, + int outputColumnNum) { + + VectorPTFEvaluatorBase evaluator; + switch (functionType) { + case ROW_NUMBER: + evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case RANK: + evaluator = new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DENSE_RANK: + evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case MIN: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case MAX: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case SUM: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case AVG: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case FIRST_VALUE: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case LAST_VALUE: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case COUNT: + if (inputVectorExpression == null) { + evaluator = new VectorPTFEvaluatorCountStar(windowFrameDef, inputVectorExpression, outputColumnNum); + } else { + evaluator = new VectorPTFEvaluatorCount(windowFrameDef, inputVectorExpression, outputColumnNum); + } + break; + default: + throw new RuntimeException("Unexpected function type " + functionType); + } + return evaluator; + } + + public static VectorPTFEvaluatorBase[] getEvaluators(VectorPTFDesc vectorPTFDesc, VectorPTFInfo vectorPTFInfo) { + String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames(); + int evaluatorCount = evaluatorFunctionNames.length; + WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs(); + VectorExpression[] evaluatorInputExpressions = vectorPTFInfo.getEvaluatorInputExpressions(); + Type[] evaluatorInputColumnVectorTypes = vectorPTFInfo.getEvaluatorInputColumnVectorTypes(); + + int[] outputColumnMap = vectorPTFInfo.getOutputColumnMap(); + + VectorPTFEvaluatorBase[] evaluators = new VectorPTFEvaluatorBase[evaluatorCount]; + for (int i = 0; i < evaluatorCount; i++) { + String functionName = evaluatorFunctionNames[i]; + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName); + VectorExpression inputVectorExpression = evaluatorInputExpressions[i]; + final Type columnVectorType = evaluatorInputColumnVectorTypes[i]; + + // The output* arrays start at index 0 for output evaluator aggregations. + final int outputColumnNum = outputColumnMap[i]; + + VectorPTFEvaluatorBase evaluator = + VectorPTFDesc.getEvaluator( + functionType, windowFrameDef, columnVectorType, inputVectorExpression, outputColumnNum); + + evaluators[i] = evaluator; + } + return evaluators; + } + + public static int[] getStreamingColumnMap(VectorPTFEvaluatorBase[] evaluators) { + final int evaluatorCount = evaluators.length; + ArrayList streamingColumns = new ArrayList(); + for (int i = 0; i < evaluatorCount; i++) { + final VectorPTFEvaluatorBase evaluator = evaluators[i]; + if (evaluator.streamsResult()) { + streamingColumns.add(evaluator.getOutputColumnNum()); + } + } + return ArrayUtils.toPrimitive(streamingColumns.toArray(new Integer[0])); + } + + public boolean getIsPartitionOrderBy() { + return isPartitionOrderBy; + } + + public void setIsPartitionOrderBy(boolean isPartitionOrderBy) { + this.isPartitionOrderBy = isPartitionOrderBy; + } + + public String[] getEvaluatorFunctionNames() { + return evaluatorFunctionNames; + } + + public void setEvaluatorFunctionNames(String[] evaluatorFunctionNames) { + this.evaluatorFunctionNames = evaluatorFunctionNames; + } + + public WindowFrameDef[] getEvaluatorWindowFrameDefs() { + return evaluatorWindowFrameDefs; + } + + public void setEvaluatorWindowFrameDefs(WindowFrameDef[] evaluatorWindowFrameDefs) { + this.evaluatorWindowFrameDefs = evaluatorWindowFrameDefs; + } + + public List[] getEvaluatorInputExprNodeDescLists() { + return evaluatorInputExprNodeDescLists; + } + + public void setEvaluatorInputExprNodeDescLists(List[] evaluatorInputExprNodeDescLists) { + this.evaluatorInputExprNodeDescLists = evaluatorInputExprNodeDescLists; + } + + public ExprNodeDesc[] getOrderExprNodeDescs() { + return orderExprNodeDescs; + } + + public void setOrderExprNodeDescs(ExprNodeDesc[] orderExprNodeDescs) { + this.orderExprNodeDescs = orderExprNodeDescs; + } + + public ExprNodeDesc[] getPartitionExprNodeDescs() { + return partitionExprNodeDescs; + } + + public void setPartitionExprNodeDescs(ExprNodeDesc[] partitionExprNodeDescs) { + this.partitionExprNodeDescs = partitionExprNodeDescs; + } + + public String[] getOutputColumnNames() { + return outputColumnNames; + } + + public void setOutputColumnNames(String[] outputColumnNames) { + this.outputColumnNames = outputColumnNames; + } + + public TypeInfo[] getOutputTypeInfos() { + return outputTypeInfos; + } + + public void setOutputTypeInfos(TypeInfo[] outputTypeInfos) { + this.outputTypeInfos = outputTypeInfos; + } + + public void setVectorPTFInfo(VectorPTFInfo vectorPTFInfo) { + this.vectorPTFInfo = vectorPTFInfo; + } + + public VectorPTFInfo getVectorPTFInfo() { + return vectorPTFInfo; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java new file mode 100644 index 0000000..cf7f038 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorGroupByAggregrationInfo. + * + * A convenience data structure that has information needed to vectorize reduce sink. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and against by the VectorPTFOperator's + * constructors and later during execution. + */ +public class VectorPTFInfo { + + private static final long serialVersionUID = 1L; + + private int[] outputColumnMap; + + private int[] orderColumnMap; + private Type[] orderColumnVectorTypes; + private VectorExpression[] orderExpressions; + + private int[] partitionColumnMap; + private Type[] partitionColumnVectorTypes; + private VectorExpression[] partitionExpressions; + + private VectorExpression[] evaluatorInputExpressions; + private Type[] evaluatorInputColumnVectorTypes; + + private int[] keyInputColumnMap; + private int[] nonKeyInputColumnMap; + + public VectorPTFInfo() { + + outputColumnMap = null; + + orderColumnMap = null; + orderColumnVectorTypes = null; + orderExpressions = null; + + partitionColumnMap = null; + partitionColumnVectorTypes = null; + partitionExpressions = null; + + evaluatorInputExpressions = null; + evaluatorInputColumnVectorTypes = null; + + keyInputColumnMap = null; + nonKeyInputColumnMap = null; + } + + public int[] getOutputColumnMap() { + return outputColumnMap; + } + + public void setOutputColumnMap(int[] outputColumnMap) { + this.outputColumnMap = outputColumnMap; + } + + public int[] getOrderColumnMap() { + return orderColumnMap; + } + + public void setOrderColumnMap(int[] orderColumnMap) { + this.orderColumnMap = orderColumnMap; + } + + public Type[] getOrderColumnVectorTypes() { + return orderColumnVectorTypes; + } + + public void setOrderColumnVectorTypes(Type[] orderColumnVectorTypes) { + this.orderColumnVectorTypes = orderColumnVectorTypes; + } + + public VectorExpression[] getOrderExpressions() { + return orderExpressions; + } + + public void setOrderExpressions(VectorExpression[] orderExpressions) { + this.orderExpressions = orderExpressions; + } + + public int[] getPartitionColumnMap() { + return partitionColumnMap; + } + + public void setPartitionColumnMap(int[] partitionColumnMap) { + this.partitionColumnMap = partitionColumnMap; + } + + public Type[] getPartitionColumnVectorTypes() { + return partitionColumnVectorTypes; + } + + public void setPartitionColumnVectorTypes(Type[] partitionColumnVectorTypes) { + this.partitionColumnVectorTypes = partitionColumnVectorTypes; + } + + public VectorExpression[] getPartitionExpressions() { + return partitionExpressions; + } + + public void setPartitionExpressions(VectorExpression[] partitionExpressions) { + this.partitionExpressions = partitionExpressions; + } + + public VectorExpression[] getEvaluatorInputExpressions() { + return evaluatorInputExpressions; + } + + public void setEvaluatorInputExpressions(VectorExpression[] evaluatorInputExpressions) { + this.evaluatorInputExpressions = evaluatorInputExpressions; + } + + public Type[] getEvaluatorInputColumnVectorTypes() { + return evaluatorInputColumnVectorTypes; + } + + public void setEvaluatorInputColumnVectorTypes(Type[] evaluatorInputColumnVectorTypes) { + this.evaluatorInputColumnVectorTypes = evaluatorInputColumnVectorTypes; + } + + public int[] getKeyInputColumnMap() { + return keyInputColumnMap; + } + + public void setKeyInputColumnMap(int[] keyInputColumnMap) { + this.keyInputColumnMap = keyInputColumnMap; + } + + public int[] getNonKeyInputColumnMap() { + return nonKeyInputColumnMap; + } + + public void setNonKeyInputColumnMap(int[] nonKeyInputColumnMap) { + this.nonKeyInputColumnMap = nonKeyInputColumnMap; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java index 0af878b..346abe3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java @@ -78,6 +78,6 @@ public int getWindowSize() { @Override public String toString() { - return start + "~" + end; + return windowType + " " + start + "~" + end; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java index b3b36bc..bec0370 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLeadLag.java @@ -146,7 +146,9 @@ public void setAmt(int amt) { @Override public String getDisplayString(String[] children) { - assert (children.length == 2); + if (children.length != 2) { + return _getFnName() + "(...)"; + } return getStandardDisplayString(_getFnName(), children); } diff --git ql/src/test/queries/clientpositive/vector_outer_reference_windowed.q ql/src/test/queries/clientpositive/vector_outer_reference_windowed.q new file mode 100644 index 0000000..5b8404a --- /dev/null +++ ql/src/test/queries/clientpositive/vector_outer_reference_windowed.q @@ -0,0 +1,89 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DROP TABLE IF EXISTS e011_01; +DROP TABLE IF EXISTS e011_02; +DROP TABLE IF EXISTS e011_03; + +CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE; + +CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)); + +CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)); + +LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01; + +INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01; + +INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01; + +ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS; +ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS; + +set hive.explain.user=false; + +explain vectorization detail +select sum(sum(c1)) over() from e011_01; +select sum(sum(c1)) over() from e011_01; + +explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2; +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2; + +explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2; +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2; + +explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2; +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2; + +explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2; +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2; diff --git ql/src/test/queries/clientpositive/vector_windowing.q ql/src/test/queries/clientpositive/vector_windowing.q new file mode 100644 index 0000000..c042157 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing.q @@ -0,0 +1,790 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; +set mapred.reduce.tasks=4; +-- SORT_QUERY_RESULTS + +-- 1. testWindowing +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 2. testGroupByWithPartitioning +explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +; +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +; + +-- 3. testGroupByHavingWithSWQ +explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; + +-- 4. testCount +explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +; +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +; + +-- 5. testCountWithWindowingUDAF +explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +; +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +; + +-- 6. testCountInSubQ +explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1; +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1; + +-- 7. testJoinWithWindowingAndPTF +explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + +-- 8. testMixedCaseAlias +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +; + +-- 9. testHavingWithWindowingNoGBY +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 10. testHavingWithWindowingCondRankNoGBY +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 11. testFirstLast +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 12. testFirstLastWithWhere +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 13. testSumWindow +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 14. testNoSortClause +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 15. testExpressions +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 16. testMultipleWindows +explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 17. testCountStar +explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 18. testUDAFs +explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 19. testUDAFsWithGBY +explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 20. testSTATs +explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 21. testDISTs +explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 22. testViewAsTableInputWithWindowing +explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand; +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand; + +explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand; +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand; + +select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row); + +-- 23. testCreateViewWithWindowingQuery +explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row); +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row); + +explain vectorization detail +select * from mfgr_brand_price_view; +select * from mfgr_brand_price_view; + +-- 24. testLateralViews +explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row); +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row); + +-- 25. testMultipleInserts3SWQs +CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE); + +CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT); + +CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT); + +explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +select * from part_1; + +select * from part_2; + +select * from part_3; + +-- 26. testGroupByHavingWithSWQAndAlias +explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; + +-- 27. testMultipleRangeWindows +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following); + +-- 28. testPartOrderInUDAFInvoke +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part; +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part; + +-- 29. testPartOrderInWdwDef +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following); + +-- 30. testDefaultPartitioningSpecRules +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name); + +-- 31. testWindowCrossReference +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1; +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1; + + +-- 32. testWindowInheritance +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row); + + +-- 33. testWindowForwardReference +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); + + +-- 34. testWindowDefinitionPropagation +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); + +-- 35. testDistinctWithWindowing +explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 36. testRankWithPartitioning +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part; +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part; + +-- 37. testPartitioningVariousForms +explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part; +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part; + +-- 38. testPartitioningVariousForms2 +explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part; +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part; + +-- 39. testUDFOnOrderCols +explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part; +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part; + +-- 40. testNoBetweenForRows +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part ; + +-- 41. testNoBetweenForRange +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part ; + +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part ; + +-- 42. testUnboundedFollowingForRows +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part ; + +-- 43. testUnboundedFollowingForRange +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part ; + +-- 44. testOverNoPartitionSingleAggregate +explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name; +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name; + +-- 45. empty partition test +explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +; +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +; + +-- 46. window sz is same as partition sz +explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1'; +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1'; + +-- 47. empty partition +explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1'; +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1'; diff --git ql/src/test/queries/clientpositive/vector_windowing_expressions.q ql/src/test/queries/clientpositive/vector_windowing_expressions.q new file mode 100644 index 0000000..6654112 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -0,0 +1,92 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; + +explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100; +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100; +explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100; +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100; +explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100; +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100; +explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100; +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100; + +explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; + +explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; + +-- multi table insert test +create table t1 (a1 int, b1 string); +create table t2 (a1 int, b1 string); +explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +select * from t1 limit 3; +select * from t2 limit 3; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; diff --git ql/src/test/queries/clientpositive/vector_windowing_gby.q ql/src/test/queries/clientpositive/vector_windowing_gby.q new file mode 100644 index 0000000..7fd18aa --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_gby.q @@ -0,0 +1,19 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; + +explain vectorization detail + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +; + + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +; diff --git ql/src/test/queries/clientpositive/vector_windowing_gby2.q ql/src/test/queries/clientpositive/vector_windowing_gby2.q new file mode 100644 index 0000000..8025b5d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_gby2.q @@ -0,0 +1,46 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; + +explain vectorization detail +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +explain vectorization detail +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +explain vectorization detail +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +explain vectorization detail +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; + +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; diff --git ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q new file mode 100644 index 0000000..1cefd78 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q @@ -0,0 +1,71 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100; +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100; + +explain vectorization detail +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck'; +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' ; +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' ; + +explain vectorization detail +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck'; +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck'; +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +; +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +; diff --git ql/src/test/queries/clientpositive/vector_windowing_order_null.q ql/src/test/queries/clientpositive/vector_windowing_order_null.q new file mode 100644 index 0000000..5098a55 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_order_null.q @@ -0,0 +1,56 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; +load data local inpath '../../data/files/over4_null' into table over10k; + +explain vectorization detail +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10; +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10; + +explain vectorization detail +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10; +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10; + +explain vectorization detail +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10; +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10; + +explain vectorization detail +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10; +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10; + +explain vectorization detail +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3; +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3; + +explain vectorization detail +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5; +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5; + +explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5; +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5; + +explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5; +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5; diff --git ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q new file mode 100644 index 0000000..6206b22 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q @@ -0,0 +1,66 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100; +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100; + +explain vectorization detail +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100; +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100; + +explain vectorization detail +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100; +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100; +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; diff --git ql/src/test/queries/clientpositive/vector_windowing_rank.q ql/src/test/queries/clientpositive/vector_windowing_rank.q new file mode 100644 index 0000000..324e3b6 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_rank.q @@ -0,0 +1,115 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, rank() over (partition by f order by t) from over10k limit 100; +select s, rank() over (partition by f order by t) from over10k limit 100; + +explain vectorization detail +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100; +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100; + +explain vectorization detail +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100; +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100; + +explain vectorization detail +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100; +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100; + +-- If following tests fail, look for the comments in class PTFPPD::process() + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10; + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10; + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10; + diff --git ql/src/test/queries/clientpositive/vector_windowing_streaming.q ql/src/test/queries/clientpositive/vector_windowing_streaming.q new file mode 100644 index 0000000..1601eec --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_streaming.q @@ -0,0 +1,83 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +set hive.limit.pushdown.memory.usage=.8; + +-- part tests +explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +; + +explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4; + +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4; + +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 2; + +-- over10k tests +explain vectorization detail +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5; + +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5; + +select * +from (select t, f, row_number() over(partition by t order by f) r from over10k) a +where r < 8 and t < 0; + +set hive.vectorized.execution.enabled=false; +set hive.limit.pushdown.memory.usage=0.8; + +explain vectorization detail +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +drop table if exists sB; +create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +select * from sB +where ctinyint is null; + +set hive.vectorized.execution.enabled=true; +set hive.limit.pushdown.memory.usage=0.8; +drop table if exists sD; + +explain vectorization detail +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +select * from sD +where ctinyint is null; diff --git ql/src/test/queries/clientpositive/vector_windowing_windowspec.q ql/src/test/queries/clientpositive/vector_windowing_windowspec.q new file mode 100644 index 0000000..6fed729 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_windowspec.q @@ -0,0 +1,68 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100; +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100; + +explain vectorization detail +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100; +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100; + +explain vectorization detail +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100; +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100; + +explain vectorization detail +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100; +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100; + +explain vectorization detail +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100; +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100; + +explain vectorization detail +select s, sum(i) over(partition by ts order by s) from over10k limit 100; +select s, sum(i) over(partition by ts order by s) from over10k limit 100; + +explain vectorization detail +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; + +explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; + +explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; + +set hive.cbo.enable=false; +-- HIVE-9228 +explain vectorization detail +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7; +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7; diff --git ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q new file mode 100644 index 0000000..7d94195 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q @@ -0,0 +1,35 @@ +--Test small dataset with larger windowing + +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +drop table if exists smalltable_windowing; + +create table smalltable_windowing( + i int, + type string); +insert into smalltable_windowing values(3, 'a'), (1, 'a'), (2, 'a'); + +explain vectorization detail +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing; + +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing; diff --git ql/src/test/results/clientpositive/correlationoptimizer12.q.out ql/src/test/results/clientpositive/correlationoptimizer12.q.out index 23443ee..ee9a6e7 100644 --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -57,7 +57,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) @@ -142,7 +142,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) diff --git ql/src/test/results/clientpositive/ctas_colname.q.out ql/src/test/results/clientpositive/ctas_colname.q.out index b0cab7e..8d61c9d 100644 --- ql/src/test/results/clientpositive/ctas_colname.q.out +++ ql/src/test/results/clientpositive/ctas_colname.q.out @@ -190,7 +190,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -354,7 +354,7 @@ STAGE PLANS: arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/distinct_windowing.q.out ql/src/test/results/clientpositive/distinct_windowing.q.out index 1605a62..197687a 100644 --- ql/src/test/results/clientpositive/distinct_windowing.q.out +++ ql/src/test/results/clientpositive/distinct_windowing.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) diff --git ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out index aac939f..85d0777 100644 --- ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out +++ ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) @@ -540,7 +540,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -680,7 +680,7 @@ STAGE PLANS: arguments: _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 4fc36ed..32135e4 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -110,7 +110,7 @@ STAGE PLANS: arguments: _col3 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/llap/groupby_resolution.q.out ql/src/test/results/clientpositive/llap/groupby_resolution.q.out index f2a6ab0..bf3d8e9 100644 --- ql/src/test/results/clientpositive/llap/groupby_resolution.q.out +++ ql/src/test/results/clientpositive/llap/groupby_resolution.q.out @@ -720,7 +720,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index fbaf1e6..7fa1719 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -96,21 +96,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -306,7 +306,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -585,21 +585,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -765,21 +765,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -957,21 +957,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1460,7 +1460,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1636,21 +1636,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1812,21 +1812,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2049,21 +2049,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2235,13 +2235,13 @@ STAGE PLANS: arguments: _col5 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) @@ -2453,33 +2453,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2819,7 +2819,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~CURRENT + window frame: ROWS PRECEDING(2)~CURRENT Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) @@ -3041,21 +3041,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -3094,7 +3094,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(5)~CURRENT + window frame: RANGE PRECEDING(5)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) @@ -3131,28 +3131,28 @@ STAGE PLANS: arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) @@ -3492,21 +3492,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -3763,21 +3763,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4009,21 +4009,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4293,21 +4293,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4558,21 +4558,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) @@ -4810,21 +4810,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/ptf_streaming.q.out ql/src/test/results/clientpositive/llap/ptf_streaming.q.out index 6013c11..18074eb 100644 --- ql/src/test/results/clientpositive/llap/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/llap/ptf_streaming.q.out @@ -96,21 +96,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -306,7 +306,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -632,7 +632,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -808,21 +808,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1045,21 +1045,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1284,21 +1284,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1523,21 +1523,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1743,33 +1743,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2022,21 +2022,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2293,21 +2293,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2547,21 +2547,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 58e78c4..057bb2a 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -346,7 +346,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -520,7 +520,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index d89361d..3f5bd38 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -450,7 +450,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -512,7 +512,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -742,7 +742,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -803,7 +803,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1041,7 +1041,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1180,7 +1180,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1245,7 +1245,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index b2b5458..dc052ac 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1288,7 +1288,7 @@ STAGE PLANS: arguments: _col5 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: first_value_window_0 (type: int) @@ -1356,7 +1356,7 @@ STAGE PLANS: arguments: _col5 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: first_value_window_0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 678db83..5d0b23c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) @@ -113,7 +113,7 @@ STAGE PLANS: arguments: _col3 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out index 95dcba9..8d40a6d 100644 --- ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -111,24 +111,24 @@ Stage-0 Reducer 2 llap File Output Operator [FS_10] Merge Join Operator [MERGEJOIN_21] (rows=2 width=431) - Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_23] + SHUFFLE [RS_6] PartitionCols:_col2 - Select Operator [SEL_22] (rows=2 width=134) + Select Operator [SEL_2] (rows=2 width=134) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=2 width=236) default@char_tbl1,c1,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] - Dynamic Partitioning Event Operator [EVENT_26] (rows=1 width=134) - Group By Operator [GBY_25] (rows=1 width=134) + Dynamic Partitioning Event Operator [EVENT_20] (rows=1 width=134) + Group By Operator [GBY_19] (rows=1 width=134) Output:["_col0"],keys:_col0 - Select Operator [SEL_24] (rows=2 width=134) + Select Operator [SEL_18] (rows=2 width=134) Output:["_col0"] - Please refer to the previous Select Operator [SEL_22] + Please refer to the previous Select Operator [SEL_2] <-Map 3 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_28] + SHUFFLE [RS_7] PartitionCols:_col2 - Select Operator [SEL_27] (rows=2 width=89) + Select Operator [SEL_5] (rows=2 width=89) Output:["_col0","_col1","_col2"] TableScan [TS_3] (rows=2 width=190) default@char_tbl2,c2,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] diff --git ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out index 6edc474..10051d1 100644 --- ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out @@ -21,7 +21,7 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cint) IN (ctinyint, cbigint) (type: boolean) + predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) @@ -40,7 +40,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint + notVectorizedReason: Predicate expression for FILTER operator: Vectorizing IN expression only supported for constant values vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out new file mode 100644 index 0000000..b87378d --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -0,0 +1,1390 @@ +PREHOOK: query: DROP TABLE IF EXISTS e011_01 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_01 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_02 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_02 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS e011_03 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS e011_03 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_01 +POSTHOOK: query: CREATE TABLE e011_01 ( + c1 decimal(15,2), + c2 decimal(15,2)) + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_01 +PREHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_02 +POSTHOOK: query: CREATE TABLE e011_02 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_02 +PREHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e011_03 +POSTHOOK: query: CREATE TABLE e011_03 ( + c1 decimal(15,2), + c2 decimal(15,2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e011_03 +PREHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@e011_01 +POSTHOOK: query: LOAD DATA + LOCAL INPATH '../../data/files/e011_01.txt' + OVERWRITE + INTO TABLE e011_01 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@e011_01 +PREHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_02 +POSTHOOK: query: INSERT INTO TABLE e011_02 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_02 +POSTHOOK: Lineage: e011_02.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_02.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_03 +POSTHOOK: query: INSERT INTO TABLE e011_03 + SELECT c1, c2 + FROM e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_03 +POSTHOOK: Lineage: e011_03.c1 SIMPLE [(e011_01)e011_01.FieldSchema(name:c1, type:decimal(15,2), comment:null), ] +POSTHOOK: Lineage: e011_03.c2 SIMPLE [(e011_01)e011_01.FieldSchema(name:c2, type:decimal(15,2), comment:null), ] +c1 c2 +PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_01 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_02 +PREHOOK: Output: default@e011_02 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_02 +POSTHOOK: Output: default@e011_02 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_03 +PREHOOK: Output: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_03 +POSTHOOK: Output: default@e011_03 +#### A masked pattern was here #### +_c0 _c1 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: ConstantVectorExpression(val 0) -> 1:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(35,2), bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val 0) -> 3:long] + outputColumns: [2, 1] + outputTypes: [decimal(35,2), decimal(25,2)] + streamingColumns: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over() from e011_01 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over() from e011_01 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +16.00 +PREHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: c1, c2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c1) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 0) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] + keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(35,2) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(c1)) over( + partition by c2 order by c1) + from e011_01 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Map 5 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(35,2) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_01.c2 order by e011_01.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_01.c1, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Map 5 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:decimal(15,2), KEY._col1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDecimal(col 2) -> decimal(38,18) + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(25,2)) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:decimal(15,2), KEY.reducesinkkey1:decimal(15,2), VALUE._col0:decimal(25,2) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(35,2) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: decimal(25,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [decimal(35,2), decimal(15,2), decimal(15,2), decimal(25,2)] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: decimal(35,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(sum(e011_01.c1)) over( + partition by e011_03.c2 order by e011_03.c1) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c1, e011_03.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +_c0 +1.00 +3.00 +5.00 +7.00 +PREHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e011_01 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Map 4 + Map Operator Tree: + TableScan + alias: e011_03 + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: c1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(15,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c1:decimal(15,2), c2:decimal(15,2) + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: decimal(15,2)) + 1 _col0 (type: decimal(15,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 1792 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: corr(_col0, _col2) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(15,2)) + Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF corr not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: corr(VALUE._col0) + keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: decimal(15,2), _col1: decimal(15,2), _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 2 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e011_01 +PREHOOK: Input: default@e011_03 +#### A masked pattern was here #### +POSTHOOK: query: select sum(corr(e011_01.c1, e011_03.c1)) + over(partition by e011_01.c2 order by e011_03.c2) + from e011_01 + join e011_03 on e011_01.c1 = e011_03.c1 + group by e011_03.c2, e011_01.c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e011_01 +POSTHOOK: Input: default@e011_03 +#### A masked pattern was here #### +sum_window_0 +NULL +NULL +NULL +NULL diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 9929550..dc098b5 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -162,16 +162,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double, double, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -190,53 +203,72 @@ STAGE PLANS: alias: row_number_window_0 name: row_number window function: GenericUDAFRowNumberEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: rank_window_1 arguments: _col0 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col0 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_3 arguments: _col2 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: last_value_window_4 arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: count_window_5 arguments: _col2 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: count_window_6 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isStar: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRowNumber, VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorDoubleFirstValue, VectorPTFEvaluatorDoubleLastValue, VectorPTFEvaluatorCount, VectorPTFEvaluatorCountStar] + functionInputExpressions: [null, col 0, col 0, col 2, col 2, col 2, null] + functionNames: [row_number, rank, dense_rank, first_value, last_value, count, count] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 4, 5, 6, 7, 8, 9, 0, 1, 2] + outputTypes: [int, int, int, double, double, bigint, bigint, string, string, double] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -387,16 +419,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double, double, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -415,53 +460,73 @@ STAGE PLANS: alias: row_number_window_0 name: row_number window function: GenericUDAFRowNumberEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: rank_window_1 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_3 arguments: _col2 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: last_value_window_4 arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: count_window_5 arguments: _col2 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: count_window_6 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT isStar: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRowNumber, VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorDoubleFirstValue, VectorPTFEvaluatorDoubleLastValue, VectorPTFEvaluatorCount, VectorPTFEvaluatorCountStar] + functionInputExpressions: [null, col 1, col 1, col 2, col 2, col 2, null] + functionNames: [row_number, rank, dense_rank, first_value, last_value, count, count] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 6, 7, 8, 9, 0, 1, 2] + outputTypes: [int, int, int, double, double, bigint, bigint, string, string, double] + partitionExpressions: [col 0] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -616,7 +681,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: row_number only CURRENT ROW end frame is supported for RANGE vectorized: false Reduce Operator Tree: Select Operator @@ -640,45 +705,45 @@ STAGE PLANS: alias: row_number_window_0 name: row_number window function: GenericUDAFRowNumberEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: rank_window_1 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_3 arguments: _col2 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: last_value_window_4 arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: count_window_5 arguments: _col2 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: count_window_6 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT isStar: true Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -831,16 +896,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -860,32 +938,51 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + streamingColumns: [] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1024,16 +1121,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -1053,32 +1163,52 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [col 0] + streamingColumns: [] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1217,16 +1347,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -1246,32 +1389,52 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [col 0] + streamingColumns: [] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1414,7 +1577,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -1439,25 +1602,25 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) @@ -1645,16 +1808,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -1674,32 +1850,51 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDecimal - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum, VectorPTFEvaluatorDecimalMin, VectorPTFEvaluatorDecimalMax, VectorPTFEvaluatorDecimalAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18), string, string, decimal(38,18)] + streamingColumns: [] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1838,16 +2033,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -1867,32 +2075,52 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDecimal - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum, VectorPTFEvaluatorDecimalMin, VectorPTFEvaluatorDecimalMax, VectorPTFEvaluatorDecimalAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18), string, string, decimal(38,18)] + partitionExpressions: [col 0] + streamingColumns: [] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2051,16 +2279,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2080,32 +2321,51 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum, VectorPTFEvaluatorLongMin, VectorPTFEvaluatorLongMax, VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [bigint, bigint, bigint, double, string, string, bigint] + streamingColumns: [] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2244,16 +2504,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2273,32 +2546,52 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: min_window_1 arguments: _col2 name: min window function: GenericUDAFMinEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: max_window_2 arguments: _col2 name: max window function: GenericUDAFMaxEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: avg_window_3 arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum, VectorPTFEvaluatorLongMin, VectorPTFEvaluatorLongMax, VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2, col 2, col 2, col 2] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [bigint, bigint, bigint, double, string, string, bigint] + partitionExpressions: [col 0] + streamingColumns: [] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2431,16 +2724,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2460,15 +2766,34 @@ STAGE PLANS: arguments: _col0 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1] + orderExpressions: [col 0] + outputColumns: [2, 0, 1] + outputTypes: [int, string, double] + streamingColumns: [2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2595,16 +2920,29 @@ STAGE PLANS: dataColumns: p_mfgr:string, p_name:string, p_retailprice:double partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2624,15 +2962,35 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 0, 1, 2] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0] + streamingColumns: [3] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2765,7 +3123,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -2790,7 +3148,7 @@ STAGE PLANS: arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2927,16 +3285,29 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: bigint, timestamp, timestamp Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, KEY.reducesinkkey2:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, timestamp Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2956,15 +3327,35 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 2] + functionNames: [rank] + keyInputColumns: [0, 2] + native: true + nonKeyInputColumns: [3] + orderExpressions: [col 2] + outputColumns: [4, 0, 2, 3] + outputTypes: [int, string, string, double] + partitionExpressions: [col 0, VectorUDFAdaptor(CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END)(children: StringGroupColEqualStringScalar(col 0, val Manufacturer#2) -> 5:boolean) -> 6:timestamp] + streamingColumns: [4] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4] Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out index c036d69..6b4ca6c 100644 --- ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -62,28 +62,44 @@ STAGE PLANS: TableScan alias: orc_decimal Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator - predicate: (id) IN ('100000000', '200000000') (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDoubleColumnInList(col 1, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0) -> 1:double) -> boolean + predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: decimal(18,0)) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is decimal(18,0) but the common type is string - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out new file mode 100644 index 0000000..97bbc93 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -0,0 +1,9608 @@ +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + outputColumnNames: p_name, p_mfgr, p_size, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 5, 7] + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size _c3 r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 5, val 0) -> boolean + predicate: (p_size > 0) (type: boolean) + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size _c3 r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorCount] + functionInputExpressions: [col 2] + functionNames: [count] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [bigint, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name cd +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique chartreuse lavender yellow 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 +Manufacturer#1 almond aquamarine burnished black steel 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 +Manufacturer#2 almond antique violet chocolate turquoise 1 +Manufacturer#2 almond antique violet turquoise frosted 2 +Manufacturer#2 almond aquamarine midnight light salmon 3 +Manufacturer#2 almond aquamarine rose maroon antique 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 +Manufacturer#3 almond antique chartreuse khaki white 1 +Manufacturer#3 almond antique forest lavender goldenrod 2 +Manufacturer#3 almond antique metallic orange dim 3 +Manufacturer#3 almond antique misty red olive 4 +Manufacturer#3 almond antique olive coral navajo 5 +Manufacturer#4 almond antique gainsboro frosted violet 1 +Manufacturer#4 almond antique violet mint lemon 2 +Manufacturer#4 almond aquamarine floral ivory bisque 3 +Manufacturer#4 almond aquamarine yellow dodger mint 4 +Manufacturer#4 almond azure aquamarine papaya violet 5 +Manufacturer#5 almond antique blue firebrick mint 1 +Manufacturer#5 almond antique medium spring khaki 2 +Manufacturer#5 almond antique sky peru orange 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 +Manufacturer#5 almond azure blanched chiffon midnight 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_2 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_3 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_4 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name r dr cd p_retailprice s1 p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 2346.3 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 3 1753.76 4100.06 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 4 1602.59 5702.65 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 5 4 5 1414.42 7117.07 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 6 1632.66 8749.73 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.36 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.62 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_2 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_3 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_4 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), round(sum_window_3, 2) (type: double), (_col5 - lag_window_4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sub1.r sub1.dr sub1.cd sub1.s1 sub1.deltasz +1 1 1 1620.67 0 +1 1 1 1671.68 0 +1 1 1 1690.68 0 +1 1 1 1789.69 0 +1 1 2 1173.15 0 +1 1 2 2346.3 0 +2 2 2 2861.95 -3 +2 2 2 2996.09 29 +2 2 2 3401.35 -25 +2 2 2 3491.38 26 +3 2 3 4100.06 32 +3 3 3 4202.35 -12 +3 3 3 4272.34 5 +3 3 3 5190.08 -4 +3 3 3 5523.36 -38 +4 3 4 5702.65 -28 +4 4 4 6047.27 -20 +4 4 4 6195.32 -18 +4 4 4 6208.18 44 +4 4 4 7222.02 23 +5 4 5 7117.07 22 +5 5 5 7337.62 5 +5 5 5 7532.61 44 +5 5 5 7672.66 -23 +5 5 5 8923.62 -7 +6 5 6 8749.73 14 +PREHOOK: query: explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Map 5 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: NOOP not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: part + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + type: TABLE + Partition table definition + input alias: abc + name: noop + order by: _col1 ASC NULLS FIRST + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + partition by: _col2 + raw input shape: + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col5 (type: int), _col7 (type: double) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_3 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col7 (type: double), round(sum_window_2, 2) (type: double), _col5 (type: int), (_col5 - lag_window_3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 29 Data size: 7279 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 7279 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +abc.p_mfgr abc.p_name r dr abc.p_retailprice s1 abc.p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 2346.3 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 3519.45 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 4692.6 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 5 2 1753.76 6446.36 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 1602.59 8048.95 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 7 4 1414.42 9463.37 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 1632.66 11096.03 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2031.98 5523.36 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 1290.35 7337.62 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.35 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1, _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + window function definition + alias: first_value_window_1 + arguments: _col5 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: last_value_window_2 + arguments: _col5, false + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), first_value_window_1 (type: int), last_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s2 f l +Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 +Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 +Manufacturer#4 almond antique violet mint lemon 39 39 10 7 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 +Manufacturer#5 almond antique medium spring khaki 6 6 31 46 +Manufacturer#5 almond antique sky peru orange 2 2 31 23 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#3) -> boolean + predicate: (p_mfgr = 'Manufacturer#3') (type: boolean) + Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'Manufacturer#3' (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#3' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val Manufacturer#3) -> 9:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 5 Data size: 1965 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 'Manufacturer#3' + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + window function definition + alias: first_value_window_2 + arguments: _col5 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: last_value_window_3 + arguments: _col5, false + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 5 Data size: 1965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'Manufacturer#3' (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: bigint), first_value_window_2 (type: int), last_value_window_3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 5 Data size: 1215 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 1215 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r s2 f l +Manufacturer#3 almond antique chartreuse khaki white 17 1 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 3 19 17 45 +Manufacturer#3 almond antique misty red olive 1 4 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 5 45 19 45 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 38 2 +Manufacturer#1 almond antique burnished rose metallic 2 44 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 6 +Manufacturer#1 almond aquamarine burnished black steel 28 110 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 18 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 14 +Manufacturer#3 almond antique metallic orange dim 19 96 19 +Manufacturer#3 almond antique misty red olive 1 79 1 +Manufacturer#3 almond antique olive coral navajo 45 65 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 7 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 12 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 6 +Manufacturer#5 almond antique sky peru orange 2 108 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank] + functionInputExpressions: [col 1, col 1] + functionNames: [rank, dense_rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 1, 0, 2] + outputTypes: [int, int, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [3, 4] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 +Manufacturer#3 almond antique metallic orange dim 19 3 3 +Manufacturer#3 almond antique misty red olive 1 4 4 +Manufacturer#3 almond antique olive coral navajo 45 5 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 +Manufacturer#4 almond antique violet mint lemon 39 2 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 +Manufacturer#5 almond antique medium spring khaki 6 2 2 +Manufacturer#5 almond antique sky peru orange 2 3 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: percent_rank_window_3 + arguments: _col1 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: ntile_window_4 + arguments: 3 + name: ntile + window function: GenericUDAFNTileEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_5 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_6 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: stddev_window_7 + arguments: _col5 + name: stddev + window function: GenericUDAFStdEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_8 + arguments: (_col5 % 5) + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_9 + arguments: _col5 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, percent_rank_window_3, ntile_window_4, count_window_5, avg_window_6, stddev_window_7, first_value_window_8, last_value_window_9, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col5 (type: int) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: first_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: int), VALUE._col9 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11, _col12, _col15 + Statistics: Num rows: 26 Data size: 14326 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col3: double, _col4: int, _col5: bigint, _col6: double, _col7: double, _col8: int, _col9: int, _col11: string, _col12: string, _col15: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col12 ASC NULLS FIRST, _col11 ASC NULLS FIRST + partition by: _col12 + raw input shape: + window functions: + window function definition + alias: first_value_window_10 + arguments: _col15 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 14326 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col12 (type: string), _col11 (type: string), _col15 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: int), _col9 (type: int), first_value_window_10 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 26 Data size: 7462 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 7462 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr cud pr nt ca avg st fv lv fvw1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 0.4 2 3 12.666666666666666 15.084944665313014 2 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 0.6 2 4 11.0 13.379088160259652 2 6 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 0.8 3 5 14.4 13.763720427268202 2 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 1.0 3 6 19.0 16.237815945091466 2 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 0.0 1 1 14.0 0.0 4 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 0.25 1 2 27.0 13.0 4 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 0.5 2 3 18.666666666666668 15.86050300449376 4 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 0.75 2 4 20.25 14.00669482783144 4 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 1.0 3 5 19.8 12.560254774486067 4 18 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 0.0 1 1 17.0 0.0 2 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 0.25 1 2 15.5 1.5 2 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 0.5 2 3 16.666666666666668 2.0548046676563256 2 19 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 0.75 2 4 12.75 7.013380069552769 2 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 1.0 3 5 19.2 14.344336861632886 2 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 0.0 1 1 10.0 0.0 0 10 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 0.25 1 2 24.5 14.5 0 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 0.5 2 3 25.333333333333332 11.897712198383164 0 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 0.75 2 4 20.75 13.007209539328564 0 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 1.0 3 5 19.0 12.149074038789951 0 12 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 0.0 1 1 31.0 0.0 1 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 0.25 1 2 18.5 12.5 1 6 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 0.5 2 3 13.0 12.832251036613439 1 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 0.75 2 4 21.25 18.102140757380052 1 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 1.0 3 5 21.6 16.206171663906314 1 23 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_3 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), sum_window_3 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, sum_window_3, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), sum_window_3 (type: bigint), _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col5 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col3: bigint, _col5: string, _col6: string, _col9: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col9 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: sum_window_4 + arguments: _col9 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(5)~CURRENT + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_4 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: bigint), _col5 (type: string), _col6 (type: string), _col9 (type: int) + outputColumnNames: sum_window_4, _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col6 (type: string), _col5 (type: string) + sort order: ++ + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: sum_window_4 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: bigint), _col9 (type: int) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: first_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: double), VALUE._col4 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col8 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col10 + Statistics: Num rows: 26 Data size: 13598 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: int, _col2: int, _col3: double, _col4: bigint, _col6: string, _col7: string, _col10: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col6 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: first_value_window_5 + arguments: _col10 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 13598 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col7 (type: string), _col6 (type: string), _col10 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: bigint), _col0 (type: bigint), first_value_window_5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr cud s1 s2 fv1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 38 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 44 10 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 72 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 114 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 54 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 56 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 81 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 99 32 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 17 31 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 31 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 50 50 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 51 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 96 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 10 17 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 49 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 76 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 83 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 95 29 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 37 8 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 39 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 85 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 108 23 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + window function definition + alias: count_window_1 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorCountStar, VectorPTFEvaluatorCount] + functionInputExpressions: [null, col 2] + functionNames: [count, count] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 1, 0, 2] + outputTypes: [bigint, bigint, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: count_window_0, count_window_1, _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: first_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: bigint, _col3: string, _col4: string, _col7: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: first_value_window_2 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), _col0 (type: bigint), _col1 (type: bigint), first_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6318 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size c ca fvw1 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: max_window_2 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: avg_window_3 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double), min_window_1 (type: double), max_window_2 (type: double), round(avg_window_3, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 6630 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6630 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s mi ma ag +Manufacturer#1 almond antique burnished rose metallic 2 4100.06 1173.15 1753.76 1366.69 +Manufacturer#1 almond antique burnished rose metallic 2 5702.65 1173.15 1753.76 1425.66 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.07 1173.15 1753.76 1423.41 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 7576.58 1173.15 1753.76 1515.32 +Manufacturer#1 almond aquamarine burnished black steel 28 6403.43 1414.42 1753.76 1600.86 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 4649.67 1414.42 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 5523.36 1690.68 2031.98 1841.12 +Manufacturer#2 almond antique violet turquoise frosted 40 7222.02 1690.68 2031.98 1805.51 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 1690.68 2031.98 1784.72 +Manufacturer#2 almond aquamarine rose maroon antique 25 7232.94 1698.66 2031.98 1808.24 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5432.24 1698.66 2031.98 1810.75 +Manufacturer#3 almond antique chartreuse khaki white 17 4272.34 1190.27 1671.68 1424.11 +Manufacturer#3 almond antique forest lavender goldenrod 14 6195.32 1190.27 1922.98 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 7532.61 1190.27 1922.98 1506.52 +Manufacturer#3 almond antique misty red olive 1 5860.93 1190.27 1922.98 1465.23 +Manufacturer#3 almond antique olive coral navajo 45 4670.66 1337.29 1922.98 1556.89 +Manufacturer#4 almond antique gainsboro frosted violet 10 4202.35 1206.26 1620.67 1400.78 +Manufacturer#4 almond antique violet mint lemon 39 6047.27 1206.26 1844.92 1511.82 +Manufacturer#4 almond aquamarine floral ivory bisque 27 7337.62 1206.26 1844.92 1467.52 +Manufacturer#4 almond aquamarine yellow dodger mint 7 5716.95 1206.26 1844.92 1429.24 +Manufacturer#4 almond azure aquamarine papaya violet 12 4341.53 1206.26 1844.92 1447.18 +Manufacturer#5 almond antique blue firebrick mint 31 5190.08 1611.66 1789.69 1730.03 +Manufacturer#5 almond antique medium spring khaki 6 6208.18 1018.1 1789.69 1552.05 +Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.53 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 5882.97 1018.1 1788.73 1470.74 +Manufacturer#5 almond azure blanched chiffon midnight 23 4271.31 1018.1 1788.73 1423.77 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + outputColumnNames: p_name, p_mfgr, p_size, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 5, 7] + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice), max(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double, VectorUDAFMaxDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 2, col 5, col 7 + native: false + projectedOutputColumns: [0, 1] + keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: double), _col5 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int, KEY._col3:double, VALUE._col0:double, VALUE._col1:double + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4) -> double, VectorUDAFMaxDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 3, 4, 5] + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double, _col4: double, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col3 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: avg_window_1 + arguments: _col3 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), round(sum_window_0, 2) (type: double), _col4 (type: double), _col5 (type: double), round(avg_window_1, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 3419 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 3419 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size p_retailprice s mi ma ag +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.83 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.32 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.86 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.36 1690.68 1690.68 1841.12 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.51 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.72 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.94 1698.66 1698.66 1808.24 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.75 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.11 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.52 +Manufacturer#3 almond antique misty red olive 1 1922.98 5860.93 1922.98 1922.98 1465.23 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.89 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.78 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.82 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.62 1206.26 1206.26 1467.52 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.95 1844.92 1844.92 1429.24 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.53 1290.35 1290.35 1447.18 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.03 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.05 +Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.53 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.97 1018.1 1018.1 1470.74 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.31 1464.48 1464.48 1423.77 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Output Columns expression for PTF operator: Data type array of column collect_set_window_2 not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: stddev_window_0 + arguments: _col7 + name: stddev + window function: GenericUDAFStdEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: stddev_pop_window_1 + arguments: _col7 + name: stddev_pop + window function: GenericUDAFStdEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: collect_set_window_2 + arguments: _col5 + name: collect_set + window function: GenericUDAFMkCollectionEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: variance_window_3 + arguments: _col7 + name: variance + window function: GenericUDAFVarianceEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: corr_window_4 + arguments: _col5, _col7 + name: corr + window function: GenericUDAFCorrelationEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: covar_pop_window_5 + arguments: _col5, _col7 + name: covar_pop + window function: GenericUDAFCovarianceEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), stddev_window_0 (type: double), stddev_pop_window_1 (type: double), collect_set_window_2 (type: array), variance_window_3 (type: double), round(corr_window_4, 5) (type: double), covar_pop_window_5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 9958 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 9958 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size sdev sdev_pop uniq_size var cor covarp +Manufacturer#1 almond antique burnished rose metallic 2 258.10677784349235 258.10677784349235 [2,34,6] 66619.10876874991 0.81133 2801.7074999999995 +Manufacturer#1 almond antique burnished rose metallic 2 273.70217881648074 273.70217881648074 [2,34] 74912.8826888888 1.0 4128.782222222221 +Manufacturer#1 almond antique chartreuse lavender yellow 34 230.90151585470358 230.90151585470358 [2,34,6,28] 53315.51002399992 0.69564 2210.7864 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 202.73109328368946 202.73109328368946 [2,34,6,28,42] 41099.896184 0.63079 2009.9536000000007 +Manufacturer#1 almond aquamarine burnished black steel 28 121.6064517973862 121.6064517973862 [34,6,28,42] 14788.129118750014 0.20367 331.1337500000004 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 96.5751586416853 96.5751586416853 [6,28,42] 9326.761266666683 -1.4E-4 -0.20666666666708502 +Manufacturer#2 almond antique violet chocolate turquoise 14 142.2363169751898 142.2363169751898 [14,40,2] 20231.169866666663 -0.4937 -1113.7466666666658 +Manufacturer#2 almond antique violet turquoise frosted 40 137.76306498840682 137.76306498840682 [14,40,2,25] 18978.662075 -0.52056 -1004.4812499999995 +Manufacturer#2 almond aquamarine midnight light salmon 2 130.03972279269132 130.03972279269132 [14,40,2,25,18] 16910.329504000005 -0.46909 -766.1791999999995 +Manufacturer#2 almond aquamarine rose maroon antique 25 135.55100986344584 135.55100986344584 [40,2,25,18] 18374.07627499999 -0.60914 -1128.1787499999987 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 156.44019460768044 156.44019460768044 [2,25,18] 24473.534488888927 -0.95717 -1441.4466666666676 +Manufacturer#3 almond antique chartreuse khaki white 17 196.7742266885805 196.7742266885805 [17,14,19] 38720.09628888887 0.55572 224.6944444444446 +Manufacturer#3 almond antique forest lavender goldenrod 14 275.14144189852607 275.14144189852607 [17,14,19,1] 75702.81305 -0.67208 -1296.9000000000003 +Manufacturer#3 almond antique metallic orange dim 19 260.23473614412046 260.23473614412046 [17,14,19,1,45] 67722.117896 -0.57035 -2129.0664 +Manufacturer#3 almond antique misty red olive 1 275.9139962356932 275.9139962356932 [14,19,1,45] 76128.53331875012 -0.57748 -2547.7868749999993 +Manufacturer#3 almond antique olive coral navajo 45 260.5815918713796 260.5815918713796 [19,1,45] 67902.76602222225 -0.87107 -4099.731111111111 +Manufacturer#4 almond antique gainsboro frosted violet 10 170.13011889596618 170.13011889596618 [10,39,27] 28944.25735555559 -0.6657 -1347.4777777777779 +Manufacturer#4 almond antique violet mint lemon 39 242.26834609323197 242.26834609323197 [10,39,27,7] 58693.95151875002 -0.80519 -2537.328125 +Manufacturer#4 almond aquamarine floral ivory bisque 27 234.10001662537326 234.10001662537326 [10,39,27,7,12] 54802.817784000035 -0.60469 -1719.8079999999995 +Manufacturer#4 almond aquamarine yellow dodger mint 7 247.3342714197732 247.3342714197732 [39,27,7,12] 61174.24181875003 -0.55087 -1719.0368749999975 +Manufacturer#4 almond azure aquamarine papaya violet 12 283.3344330566893 283.3344330566893 [27,7,12] 80278.40095555557 -0.77557 -1867.4888888888881 +Manufacturer#5 almond antique blue firebrick mint 31 83.69879024746363 83.69879024746363 [31,6,2] 7005.487488888913 0.39004 418.9233333333353 +Manufacturer#5 almond antique medium spring khaki 6 316.68049612345885 316.68049612345885 [31,6,2,46] 100286.53662500004 -0.71361 -4090.853749999999 +Manufacturer#5 almond antique sky peru orange 2 285.40506298242155 285.40506298242155 [31,6,2,46,23] 81456.04997600002 -0.71286 -3297.2011999999986 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 285.43749038756283 285.43749038756283 [6,2,46,23] 81474.56091875004 -0.98413 -4871.028125000002 +Manufacturer#5 almond azure blanched chiffon midnight 23 315.9225931564038 315.9225931564038 [2,46,23] 99807.08486666664 -0.99789 -5664.856666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Output Columns expression for PTF operator: Data type array> of column histogram_numeric_window_0 not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: histogram_numeric_window_0 + arguments: _col7, 5 + name: histogram_numeric + window function: GenericUDAFHistogramNumericEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: percentile_window_1 + arguments: _col0, 0.5 + name: percentile + window function: GenericUDAFBridgeEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), histogram_numeric_window_0 (type: array>), percentile_window_1 (type: double), row_number_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 24830 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 24830 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size hist per rn +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 115872.0 2 +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1753.76,"y":1.0}] 121152.0 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 [{"x":1173.15,"y":2.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 110592.0 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 [{"x":1173.15,"y":1.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86428.0 4 +Manufacturer#1 almond aquamarine burnished black steel 28 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86098.0 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0}] 86428.0 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 [{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 1 +Manufacturer#2 almond antique violet turquoise frosted 40 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 139825.5 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 169347.0 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 5 +Manufacturer#3 almond antique chartreuse khaki white 17 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}] 90681.0 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 65831.5 2 +Manufacturer#3 almond antique metallic orange dim 19 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 90681.0 3 +Manufacturer#3 almond antique misty red olive 1 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 76690.0 4 +Manufacturer#3 almond antique olive coral navajo 45 [{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 112398.0 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0}] 48427.0 1 +Manufacturer#4 almond antique violet mint lemon 39 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 46844.0 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 45261.0 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1844.92,"y":1.0}] 39309.0 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1844.92,"y":1.0}] 33357.0 5 +Manufacturer#5 almond antique blue firebrick mint 31 [{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 155733.0 1 +Manufacturer#5 almond antique medium spring khaki 6 [{"x":1018.1,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 99201.0 2 +Manufacturer#5 almond antique sky peru orange 2 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 78486.0 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0}] 60577.5 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1788.73,"y":1.0}] 78486.0 5 +PREHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +POSTHOOK: type: CREATEVIEW +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Create View Operator: + Create View + if not exists: true + or replace: false + columns: p_mfgr string, p_brand string, s double + expanded text: select `part`.`p_mfgr`, `part`.`p_brand`, +round(sum(`part`.`p_retailprice`),2) as `s` +from `default`.`part` +group by `part`.`p_mfgr`, `part`.`p_brand` + name: default.mfgr_price_view + original text: select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand + rewrite enabled: false + +PREHOOK: query: create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@mfgr_price_view +POSTHOOK: query: create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mfgr_price_view +POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +p_mfgr p_brand s +PREHOOK: query: explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + properties: + insideView TRUE + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 7] + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 3 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:double + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 2) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: round(_col2, 2) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [RoundWithNumDigitsDoubleToDouble(col 2, decimalPlaces 2) -> 4:double] + functionNames: [sum] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + streamingColumns: [] + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), round(_col2, 2) (type: double), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 5, 6] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 2, decimalPlaces 2) -> 5:double, RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2) -> 6:double + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col3 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double, VALUE._col1:double + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sq.p_mfgr sq.p_brand sq.s sq.s1 +Manufacturer#1 Brand#12 4800.84 8749.73 +Manufacturer#1 Brand#14 2346.3 8749.73 +Manufacturer#1 Brand#15 1602.59 8749.73 +Manufacturer#2 Brand#22 3491.38 8923.62 +Manufacturer#2 Brand#23 2031.98 8923.62 +Manufacturer#2 Brand#24 1698.66 8923.62 +Manufacturer#2 Brand#25 1701.6 8923.62 +Manufacturer#3 Brand#31 1671.68 7532.61 +Manufacturer#3 Brand#32 3333.37 7532.61 +Manufacturer#3 Brand#34 1337.29 7532.61 +Manufacturer#3 Brand#35 1190.27 7532.61 +Manufacturer#4 Brand#41 4755.94 7337.62 +Manufacturer#4 Brand#42 2581.68 7337.62 +Manufacturer#5 Brand#51 1611.66 7672.66 +Manufacturer#5 Brand#52 3254.17 7672.66 +Manufacturer#5 Brand#53 2806.83 7672.66 +PREHOOK: query: select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_brand s s1 +Manufacturer#1 Brand#12 4800.84 4800.84 +Manufacturer#1 Brand#14 2346.3 7147.14 +Manufacturer#1 Brand#15 1602.59 8749.73 +Manufacturer#2 Brand#22 3491.38 3491.38 +Manufacturer#2 Brand#23 2031.98 5523.36 +Manufacturer#2 Brand#24 1698.66 7222.02 +Manufacturer#2 Brand#25 1701.6 5432.24 +Manufacturer#3 Brand#31 1671.68 1671.68 +Manufacturer#3 Brand#32 3333.37 5005.05 +Manufacturer#3 Brand#34 1337.29 6342.34 +Manufacturer#3 Brand#35 1190.27 5860.93 +Manufacturer#4 Brand#41 4755.94 4755.94 +Manufacturer#4 Brand#42 2581.68 7337.62 +Manufacturer#5 Brand#51 1611.66 1611.66 +Manufacturer#5 Brand#52 3254.17 4865.83 +Manufacturer#5 Brand#53 2806.83 7672.66 +PREHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +POSTHOOK: type: CREATEVIEW +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Create View Operator: + Create View + if not exists: true + or replace: false + columns: p_mfgr string, p_brand string, s double + expanded text: select `part`.`p_mfgr`, `part`.`p_brand`, +round(sum(`part`.`p_retailprice`) over w1,2) as `s` +from `default`.`part` +window w1 as (distribute by `part`.`p_mfgr` sort by `part`.`p_name` rows between 2 preceding and current row) + name: default.mfgr_brand_price_view + original text: select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) + rewrite enabled: false + +PREHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@mfgr_brand_price_view +POSTHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mfgr_brand_price_view +POSTHOOK: Lineage: mfgr_brand_price_view.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_brand_price_view.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_brand_price_view.s SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +p_mfgr p_brand s +PREHOOK: query: explain vectorization detail +select * from mfgr_brand_price_view +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from mfgr_brand_price_view +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + properties: + insideView TRUE + Statistics: Num rows: 26 Data size: 8294 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 8294 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_brand (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col3, _col7 + Statistics: Num rows: 26 Data size: 15262 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col3: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~CURRENT + Statistics: Num rows: 26 Data size: 15262 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from mfgr_brand_price_view +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_brand_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from mfgr_brand_price_view +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_brand_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +mfgr_brand_price_view.p_mfgr mfgr_brand_price_view.p_brand mfgr_brand_price_view.s +Manufacturer#1 Brand#12 4100.06 +Manufacturer#1 Brand#12 4649.67 +Manufacturer#1 Brand#12 4770.77 +Manufacturer#1 Brand#14 1173.15 +Manufacturer#1 Brand#14 2346.3 +Manufacturer#1 Brand#15 4529.5 +Manufacturer#2 Brand#22 1690.68 +Manufacturer#2 Brand#22 3491.38 +Manufacturer#2 Brand#23 5523.36 +Manufacturer#2 Brand#24 5531.34 +Manufacturer#2 Brand#25 5432.24 +Manufacturer#3 Brand#31 1671.68 +Manufacturer#3 Brand#32 4272.34 +Manufacturer#3 Brand#32 4523.64 +Manufacturer#3 Brand#34 4670.66 +Manufacturer#3 Brand#35 2861.95 +Manufacturer#4 Brand#41 1620.67 +Manufacturer#4 Brand#41 4341.53 +Manufacturer#4 Brand#41 4426.6 +Manufacturer#4 Brand#42 2996.09 +Manufacturer#4 Brand#42 4202.35 +Manufacturer#5 Brand#51 3401.35 +Manufacturer#5 Brand#52 1789.69 +Manufacturer#5 Brand#52 4271.31 +Manufacturer#5 Brand#53 4418.49 +Manufacturer#5 Brand#53 5190.08 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int), array(1,2,3) (type: array) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 7254 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward + Statistics: Num rows: 26 Data size: 7254 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 8710 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 10166 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 10166 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Select Operator + expressions: _col3 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 26 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 10166 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 10166 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array of GenericUDFArray(Const int 1, Const int 2, Const int 3) not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 13780 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col4: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~CURRENT + Statistics: Num rows: 52 Data size: 13780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 52 Data size: 14196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 52 Data size: 14196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name lv_col p_size s +Manufacturer#1 almond antique burnished rose metallic 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 2 4 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1 34 90 +Manufacturer#1 almond antique chartreuse lavender yellow 2 34 96 +Manufacturer#1 almond antique chartreuse lavender yellow 3 34 102 +Manufacturer#1 almond antique salmon chartreuse burlywood 1 6 10 +Manufacturer#1 almond antique salmon chartreuse burlywood 2 6 14 +Manufacturer#1 almond antique salmon chartreuse burlywood 3 6 18 +Manufacturer#1 almond aquamarine burnished black steel 1 28 40 +Manufacturer#1 almond aquamarine burnished black steel 2 28 62 +Manufacturer#1 almond aquamarine burnished black steel 3 28 84 +Manufacturer#1 almond aquamarine pink moccasin thistle 1 42 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 2 42 118 +Manufacturer#1 almond aquamarine pink moccasin thistle 3 42 126 +Manufacturer#2 almond antique violet chocolate turquoise 1 14 18 +Manufacturer#2 almond antique violet chocolate turquoise 2 14 30 +Manufacturer#2 almond antique violet chocolate turquoise 3 14 42 +Manufacturer#2 almond antique violet turquoise frosted 1 40 90 +Manufacturer#2 almond antique violet turquoise frosted 2 40 105 +Manufacturer#2 almond antique violet turquoise frosted 3 40 120 +Manufacturer#2 almond aquamarine midnight light salmon 1 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 4 +Manufacturer#2 almond aquamarine midnight light salmon 3 2 6 +Manufacturer#2 almond aquamarine rose maroon antique 1 25 61 +Manufacturer#2 almond aquamarine rose maroon antique 2 25 68 +Manufacturer#2 almond aquamarine rose maroon antique 3 25 75 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 18 46 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 2 18 50 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 3 18 54 +Manufacturer#3 almond antique chartreuse khaki white 1 17 45 +Manufacturer#3 almond antique chartreuse khaki white 2 17 48 +Manufacturer#3 almond antique chartreuse khaki white 3 17 51 +Manufacturer#3 almond antique forest lavender goldenrod 1 14 16 +Manufacturer#3 almond antique forest lavender goldenrod 2 14 29 +Manufacturer#3 almond antique forest lavender goldenrod 3 14 42 +Manufacturer#3 almond antique metallic orange dim 1 19 53 +Manufacturer#3 almond antique metallic orange dim 2 19 55 +Manufacturer#3 almond antique metallic orange dim 3 19 57 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique misty red olive 2 1 2 +Manufacturer#3 almond antique misty red olive 3 1 3 +Manufacturer#3 almond antique olive coral navajo 1 45 83 +Manufacturer#3 almond antique olive coral navajo 2 45 109 +Manufacturer#3 almond antique olive coral navajo 3 45 135 +Manufacturer#4 almond antique gainsboro frosted violet 1 10 24 +Manufacturer#4 almond antique gainsboro frosted violet 2 10 27 +Manufacturer#4 almond antique gainsboro frosted violet 3 10 30 +Manufacturer#4 almond antique violet mint lemon 1 39 93 +Manufacturer#4 almond antique violet mint lemon 2 39 105 +Manufacturer#4 almond antique violet mint lemon 3 39 117 +Manufacturer#4 almond aquamarine floral ivory bisque 1 27 51 +Manufacturer#4 almond aquamarine floral ivory bisque 2 27 66 +Manufacturer#4 almond aquamarine floral ivory bisque 3 27 81 +Manufacturer#4 almond aquamarine yellow dodger mint 1 7 7 +Manufacturer#4 almond aquamarine yellow dodger mint 2 7 14 +Manufacturer#4 almond aquamarine yellow dodger mint 3 7 21 +Manufacturer#4 almond azure aquamarine papaya violet 1 12 32 +Manufacturer#4 almond azure aquamarine papaya violet 2 12 34 +Manufacturer#4 almond azure aquamarine papaya violet 3 12 36 +Manufacturer#5 almond antique blue firebrick mint 1 31 77 +Manufacturer#5 almond antique blue firebrick mint 2 31 85 +Manufacturer#5 almond antique blue firebrick mint 3 31 93 +Manufacturer#5 almond antique medium spring khaki 1 6 10 +Manufacturer#5 almond antique medium spring khaki 2 6 14 +Manufacturer#5 almond antique medium spring khaki 3 6 18 +Manufacturer#5 almond antique sky peru orange 1 2 2 +Manufacturer#5 almond antique sky peru orange 2 2 4 +Manufacturer#5 almond antique sky peru orange 3 2 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1 46 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 2 46 123 +Manufacturer#5 almond aquamarine dodger light gainsboro 3 46 138 +Manufacturer#5 almond azure blanched chiffon midnight 1 23 35 +Manufacturer#5 almond azure blanched chiffon midnight 2 23 52 +Manufacturer#5 almond azure blanched chiffon midnight 3 23 69 +PREHOOK: query: CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_1 +POSTHOOK: query: CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_1 +PREHOOK: query: CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_2 +POSTHOOK: query: CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_2 +PREHOOK: query: CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_3 +POSTHOOK: query: CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_3 +PREHOOK: query: explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_1 + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col4 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col4: string, _col5: string, _col8: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col8 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: sum_window_3 + arguments: _col8 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(5)~CURRENT + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_3 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col4 (type: string), _col5 (type: string), _col8 (type: int) + outputColumnNames: sum_window_3, _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: sum_window_3 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col8 (type: int) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: first_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: double), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: int, _col2: int, _col3: double, _col5: string, _col6: string, _col9: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: first_value_window_4 + arguments: _col9 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col6 (type: string), _col5 (type: string), _col9 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int), UDFToDouble(round(_col0, 1)) (type: double), first_value_window_4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_2 + Reducer 6 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + window function definition + alias: count_window_1 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorCountStar, VectorPTFEvaluatorCount] + functionInputExpressions: [null, col 2] + functionNames: [count, count] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 1, 0, 2] + outputTypes: [bigint, bigint, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: count_window_0, count_window_1, _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 4, 1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) + Reducer 7 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: first_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: bigint, _col3: string, _col4: string, _col7: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: first_value_window_2 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 13182 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), first_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_3 + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Output: default@part_1 +PREHOOK: Output: default@part_2 +PREHOOK: Output: default@part_3 +POSTHOOK: query: from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part_1 +POSTHOOK: Output: default@part_2 +POSTHOOK: Output: default@part_3 +POSTHOOK: Lineage: part_1.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_1.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.s SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.cud SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.fv1 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_2.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.s2 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.c SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.ca SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.fv SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +_col0 _col1 _col2 _col3 _col4 _col5 +PREHOOK: query: select * from part_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_1 +#### A masked pattern was here #### +part_1.p_mfgr part_1.p_name part_1.p_size part_1.r part_1.dr part_1.s +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: select * from part_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_2 +#### A masked pattern was here #### +part_2.p_mfgr part_2.p_name part_2.p_size part_2.r part_2.dr part_2.cud part_2.s2 part_2.fv1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0 34.0 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0 10.0 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0 28.0 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1 42.0 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0 14.0 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0 40.0 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0 2.0 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0 25.0 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1 32.0 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0 31.0 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0 14.0 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0 50.0 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0 1.0 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1 45.0 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0 17.0 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0 39.0 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0 27.0 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0 7.0 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1 29.0 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0 31.0 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0 8.0 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0 2.0 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0 46.0 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1 23.0 2 +PREHOOK: query: select * from part_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_3 +#### A masked pattern was here #### +part_3.p_mfgr part_3.p_name part_3.p_size part_3.c part_3.ca part_3.fv +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 5, val 0) -> boolean + predicate: (p_size > 0) (type: boolean) + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size mi r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(10)~CURRENT + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE CURRENT~FOLLOWING(10) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s2 s1 +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique chartreuse lavender yellow 34 62 76 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 6 +Manufacturer#1 almond aquamarine burnished black steel 28 28 62 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 32 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 43 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 43 +Manufacturer#3 almond antique chartreuse khaki white 17 31 36 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 50 +Manufacturer#3 almond antique metallic orange dim 19 50 19 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique olive coral navajo 45 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 17 22 +Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 29 +Manufacturer#4 almond azure aquamarine papaya violet 12 29 12 +Manufacturer#5 almond antique blue firebrick mint 31 54 31 +Manufacturer#5 almond antique medium spring khaki 6 8 6 +Manufacturer#5 almond antique sky peru orange 2 2 8 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 54 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s s2 +Manufacturer#1 almond antique burnished rose metallic 2 38 4 +Manufacturer#1 almond antique burnished rose metallic 2 44 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 44 +Manufacturer#1 almond aquamarine burnished black steel 28 110 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 99 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 31 +Manufacturer#3 almond antique metallic orange dim 19 96 50 +Manufacturer#3 almond antique misty red olive 1 79 51 +Manufacturer#3 almond antique olive coral navajo 45 65 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 95 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 37 +Manufacturer#5 almond antique sky peru orange 2 108 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 6 +Manufacturer#1 almond aquamarine burnished black steel 28 28 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 14 +Manufacturer#3 almond antique metallic orange dim 19 19 19 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique olive coral navajo 45 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 7 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 6 +Manufacturer#5 almond antique sky peru orange 2 2 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 4 2 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 +Manufacturer#3 almond antique misty red olive 1 1 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 37 +Manufacturer#5 almond antique sky peru orange 2 2 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 44 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 99 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 50 +Manufacturer#3 almond antique misty red olive 1 1 51 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 49 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 95 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 37 37 +Manufacturer#5 almond antique sky peru orange 2 2 39 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_2 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint), sum_window_2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 38 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 112 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 45 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 51 +Manufacturer#3 almond antique metallic orange dim 19 19 50 96 +Manufacturer#3 almond antique misty red olive 1 1 51 79 +Manufacturer#3 almond antique olive coral navajo 45 45 96 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 76 +Manufacturer#4 almond antique violet mint lemon 39 39 49 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 46 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 39 +Manufacturer#5 almond antique medium spring khaki 6 6 37 85 +Manufacturer#5 almond antique sky peru orange 2 2 39 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 71 +PREHOOK: query: explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int, KEY._col3:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + projectedOutputColumns: [] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [int, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [3] + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col6:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, bigint, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col6 (type: double) + outputColumnNames: _col2, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 26 Data size: 9724 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_4 + arguments: _col7 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg, VectorPTFEvaluatorCount] + functionInputExpressions: [col 1, col 1, col 1, col 1, col 1] + functionNames: [sum, min, max, avg, count] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1] + orderExpressions: [col 0] + outputColumns: [2, 3, 4, 5, 6, 0, 1] + outputTypes: [double, double, double, double, bigint, string, double] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 9724 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), round(sum_window_0, 2) (type: double), min_window_1 (type: double), max_window_2 (type: double), round(avg_window_3, 2) (type: double), count_window_4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 7, 3, 4, 8, 6] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 2, decimalPlaces 2) -> 7:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 8:double + Statistics: Num rows: 26 Data size: 3588 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 3588 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr s1 s2 s3 s4 s5 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2, _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double), min_window_1 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: sum_window_0, min_window_1, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double, VALUE._col1:double, VALUE._col5:int, VALUE._col7:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), VALUE._col1 (type: double), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int), VALUE._col7 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 0, 4, 5] + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: double, _col1: double, _col3: string, _col4: string, _col7: int, _col9: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col4, _col3 + raw input shape: + window functions: + window function definition + alias: max_window_2 + arguments: _col9 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleMax] + functionInputExpressions: [col 5] + functionNames: [max] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3, 4, 5] + orderExpressions: [col 1] + outputColumns: [6, 2, 3, 1, 0, 4, 5] + outputTypes: [double, double, double, string, string, int, double] + partitionExpressions: [col 0, col 1] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 13390 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), round(_col0, 2) (type: double), _col1 (type: double), max_window_2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 4, 7, 3, 6] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 2, decimalPlaces 2) -> 7:double + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 1753.76 1753.76 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 1602.59 1602.59 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 1414.42 1414.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 1632.66 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 1800.7 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 2031.98 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 1698.66 1698.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 1701.6 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1671.68 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 1190.27 1190.27 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 1410.39 1410.39 +Manufacturer#3 almond antique misty red olive 1 1922.98 1922.98 1922.98 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 1337.29 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1620.67 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 1206.26 1206.26 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 1844.92 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 1290.35 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 +Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5252 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), substr(p_type, 2) (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: StringSubstrColStart(col 4, start 1) -> 9:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5252 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_type (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string, string, string + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col2, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + Statistics: Num rows: 26 Data size: 12220 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: substr(_col4, 2) ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: substr(_col4, 2) + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [StringSubstrColStart(col 2, start 1) -> 5:string] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [StringSubstrColStart(col 2, start 1) -> 4:string] + outputColumns: [3, 0, 2] + outputTypes: [int, string, string] + partitionExpressions: [col 0] + streamingColumns: [3] + Statistics: Num rows: 26 Data size: 12220 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col4 (type: string), substr(_col4, 2) (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 6, 3] + selectExpressions: StringSubstrColStart(col 2, start 1) -> 6:string + Statistics: Num rows: 26 Data size: 10140 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 10140 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_type short_ptype r +Manufacturer#1 LARGE BRUSHED STEEL ARGE BRUSHED STEEL 1 +Manufacturer#1 LARGE BURNISHED STEEL ARGE BURNISHED STEEL 2 +Manufacturer#1 PROMO BURNISHED NICKEL ROMO BURNISHED NICKEL 3 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 STANDARD ANODIZED STEEL TANDARD ANODIZED STEEL 6 +Manufacturer#2 ECONOMY POLISHED STEEL CONOMY POLISHED STEEL 1 +Manufacturer#2 MEDIUM ANODIZED COPPER EDIUM ANODIZED COPPER 2 +Manufacturer#2 MEDIUM BURNISHED COPPER EDIUM BURNISHED COPPER 3 +Manufacturer#2 SMALL POLISHED NICKEL MALL POLISHED NICKEL 4 +Manufacturer#2 STANDARD PLATED TIN TANDARD PLATED TIN 5 +Manufacturer#3 ECONOMY PLATED COPPER CONOMY PLATED COPPER 1 +Manufacturer#3 MEDIUM BURNISHED BRASS EDIUM BURNISHED BRASS 2 +Manufacturer#3 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#3 PROMO ANODIZED TIN ROMO ANODIZED TIN 4 +Manufacturer#3 STANDARD POLISHED STEEL TANDARD POLISHED STEEL 5 +Manufacturer#4 ECONOMY BRUSHED COPPER CONOMY BRUSHED COPPER 1 +Manufacturer#4 PROMO POLISHED STEEL ROMO POLISHED STEEL 4 +Manufacturer#4 SMALL BRUSHED BRASS MALL BRUSHED BRASS 2 +Manufacturer#4 SMALL PLATED STEEL MALL PLATED STEEL 3 +Manufacturer#4 STANDARD ANODIZED TIN TANDARD ANODIZED TIN 5 +Manufacturer#5 ECONOMY BURNISHED STEEL CONOMY BURNISHED STEEL 2 +Manufacturer#5 LARGE BRUSHED BRASS ARGE BRUSHED BRASS 1 +Manufacturer#5 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#5 SMALL PLATED BRASS MALL PLATED BRASS 4 +Manufacturer#5 STANDARD BURNISHED TIN TANDARD BURNISHED TIN 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 4272.34 +Manufacturer#3 almond antique misty red olive 1 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 3401.35 +Manufacturer#5 almond antique sky peru orange 2 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0, 1, 3] + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [col 3] + functionNames: [sum] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 0, 1, 3] + outputTypes: [double, string, string, int, double] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 2) -> 5:double + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.07 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3948.89 +Manufacturer#1 almond aquamarine burnished black steel 28 5363.31 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 3722.66 +Manufacturer#2 almond antique violet turquoise frosted 40 8923.62 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 25 7122.92 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5424.26 +Manufacturer#3 almond antique chartreuse khaki white 17 4784.93 +Manufacturer#3 almond antique forest lavender goldenrod 14 3113.25 +Manufacturer#3 almond antique metallic orange dim 19 6195.32 +Manufacturer#3 almond antique misty red olive 1 1922.98 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 3465.59 +Manufacturer#4 almond antique violet mint lemon 39 7337.62 +Manufacturer#4 almond aquamarine floral ivory bisque 27 5962.2 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 12 4755.94 +Manufacturer#5 almond antique blue firebrick mint 31 6654.56 +Manufacturer#5 almond antique medium spring khaki 6 3400.39 +Manufacturer#5 almond antique sky peru orange 2 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 7672.66 +Manufacturer#5 almond azure blanched chiffon midnight 23 4864.87 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 7576.58 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique chartreuse lavender yellow 34 6403.43 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4649.67 +Manufacturer#1 almond aquamarine burnished black steel 28 3047.08 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 8923.62 +Manufacturer#2 almond antique violet turquoise frosted 40 7232.94 +Manufacturer#2 almond aquamarine midnight light salmon 2 5432.24 +Manufacturer#2 almond aquamarine rose maroon antique 25 3400.26 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 7532.61 +Manufacturer#3 almond antique forest lavender goldenrod 14 5860.93 +Manufacturer#3 almond antique metallic orange dim 19 4670.66 +Manufacturer#3 almond antique misty red olive 1 3260.27 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 7337.62 +Manufacturer#4 almond antique violet mint lemon 39 5716.95 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4341.53 +Manufacturer#4 almond aquamarine yellow dodger mint 7 3135.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 7672.66 +Manufacturer#5 almond antique medium spring khaki 6 5882.97 +Manufacturer#5 almond antique sky peru orange 2 4271.31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 2482.58 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3386.42 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6403.43 +Manufacturer#1 almond aquamarine burnished black steel 28 4800.84 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 6891.64 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 +Manufacturer#2 almond aquamarine rose maroon antique 25 3499.36 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5200.96 +Manufacturer#3 almond antique chartreuse khaki white 17 4419.36 +Manufacturer#3 almond antique forest lavender goldenrod 14 5609.63 +Manufacturer#3 almond antique metallic orange dim 19 2747.68 +Manufacturer#3 almond antique misty red olive 1 7532.61 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 5492.7 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 27 2581.68 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7337.62 +Manufacturer#4 almond azure aquamarine papaya violet 12 3872.03 +Manufacturer#5 almond antique blue firebrick mint 31 2807.79 +Manufacturer#5 almond antique medium spring khaki 6 5883.93 +Manufacturer#5 almond antique sky peru orange 2 7672.66 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 23 4272.27 +PREHOOK: query: explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val 0) -> 9:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col1:string, VALUE._col7:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, bigint, double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col7 (type: double) + outputColumnNames: _col1, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + Statistics: Num rows: 26 Data size: 10322 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [ConstantVectorExpression(val 0) -> 4:long] + outputColumns: [3, 1, 2] + outputTypes: [double, string, double] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 10322 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col7 (type: double), round(avg_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2) -> 5:double + Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:double, VALUE._col1:double + partitionColumnCount: 0 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_name p_retailprice _c2 +almond antique blue firebrick mint 1789.69 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique chartreuse khaki white 1671.68 1546.78 +almond antique chartreuse lavender yellow 1753.76 1546.78 +almond antique forest lavender goldenrod 1190.27 1546.78 +almond antique gainsboro frosted violet 1620.67 1546.78 +almond antique medium spring khaki 1611.66 1546.78 +almond antique metallic orange dim 1410.39 1546.78 +almond antique misty red olive 1922.98 1546.78 +almond antique olive coral navajo 1337.29 1546.78 +almond antique salmon chartreuse burlywood 1602.59 1546.78 +almond antique sky peru orange 1788.73 1546.78 +almond antique violet chocolate turquoise 1690.68 1546.78 +almond antique violet mint lemon 1375.42 1546.78 +almond antique violet turquoise frosted 1800.7 1546.78 +almond aquamarine burnished black steel 1414.42 1546.78 +almond aquamarine dodger light gainsboro 1018.1 1546.78 +almond aquamarine floral ivory bisque 1206.26 1546.78 +almond aquamarine midnight light salmon 2031.98 1546.78 +almond aquamarine pink moccasin thistle 1632.66 1546.78 +almond aquamarine rose maroon antique 1698.66 1546.78 +almond aquamarine sandy cyan gainsboro 1701.6 1546.78 +almond aquamarine yellow dodger mint 1844.92 1546.78 +almond azure aquamarine papaya violet 1290.35 1546.78 +almond azure blanched chiffon midnight 1464.48 1546.78 +PREHOOK: query: explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#6) -> boolean + predicate: (p_mfgr = 'Manufacturer#6') (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'Manufacturer#6' (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#6' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val Manufacturer#6) -> 9:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: 'Manufacturer#6' + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'Manufacturer#6' (type: string), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr sum_window_0 +PREHOOK: query: explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#1) -> boolean + predicate: (p_mfgr = 'Manufacturer#1') (type: boolean) + Statistics: Num rows: 5 Data size: 1135 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'Manufacturer#1' (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#1' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val Manufacturer#1) -> 9:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 1135 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), VALUE._col6 (type: double) + outputColumnNames: _col1, _col7 + Statistics: Num rows: 5 Data size: 1985 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 'Manufacturer#1' + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS CURRENT~FOLLOWING(6) + window function definition + alias: sum_window_1 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS CURRENT~FOLLOWING(6) + Statistics: Num rows: 5 Data size: 1985 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col7 (type: double), round(avg_window_0, 2) (type: double), round(sum_window_1, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_retailprice _c1 _c2 +1173.15 1458.29 8749.73 +1173.15 1515.32 7576.58 +1414.42 1523.54 3047.08 +1602.59 1549.89 4649.67 +1632.66 1632.66 1632.66 +1753.76 1600.86 6403.43 +PREHOOK: query: explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val m1) -> boolean + predicate: (p_mfgr = 'm1') (type: boolean) + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 'm1' (type: string) + sort order: + + Map-reduce partition columns: 'm1' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val m1) -> 9:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col5:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string + Reduce Operator Tree: + Select Operator + expressions: VALUE._col5 (type: int) + outputColumnNames: _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 'm1' ASC NULLS FIRST + partition by: 'm1' + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [] + native: true + nonKeyInputColumns: [1] + orderExpressions: [ConstantVectorExpression(val m1) -> 3:string] + outputColumns: [2, 1] + outputTypes: [bigint, int] + streamingColumns: [] + Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sum_window_0 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out new file mode 100644 index 0000000..6d77235 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -0,0 +1,1993 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_1 + arguments: lag(...) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_2 + arguments: _col7 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_3 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_4 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_5 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Lead/Lag information: lag(...) (type: double) + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 _c4 +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1173.15 2 true true +Manufacturer#1 1414.42 28 true true +Manufacturer#1 1602.59 6 true true +Manufacturer#1 1632.66 42 true true +Manufacturer#1 1753.76 34 true true +Manufacturer#2 1690.68 14 true true +Manufacturer#2 1698.66 25 true true +Manufacturer#2 1701.6 18 true true +Manufacturer#2 1800.7 40 true true +Manufacturer#2 2031.98 2 true true +Manufacturer#3 1190.27 14 true true +Manufacturer#3 1337.29 45 true true +Manufacturer#3 1410.39 19 true true +Manufacturer#3 1671.68 17 true true +Manufacturer#3 1922.98 1 true true +Manufacturer#4 1206.26 27 true true +Manufacturer#4 1290.35 12 true true +Manufacturer#4 1375.42 39 true true +Manufacturer#4 1620.67 10 true true +Manufacturer#4 1844.92 7 true true +Manufacturer#5 1018.1 46 true true +Manufacturer#5 1464.48 23 true true +Manufacturer#5 1611.66 6 true true +Manufacturer#5 1788.73 2 true true +Manufacturer#5 1789.69 31 true true +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col7 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_1 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size r s2 s1 +Manufacturer#1 1173.15 2 1 1173.15 1168.15 +Manufacturer#1 1173.15 2 1 2346.3 2341.3 +Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003 +Manufacturer#1 1602.59 6 4 5363.31 5358.31 +Manufacturer#1 1632.66 42 5 6995.97 6990.97 +Manufacturer#1 1753.76 34 6 8749.73 8744.73 +Manufacturer#2 1690.68 14 1 1690.68 1685.68 +Manufacturer#2 1698.66 25 2 3389.34 3384.34 +Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005 +Manufacturer#2 1800.7 40 4 6891.64 6886.64 +Manufacturer#2 2031.98 2 5 8923.62 8918.62 +Manufacturer#3 1190.27 14 1 1190.27 1185.27 +Manufacturer#3 1337.29 45 2 2527.56 2522.56 +Manufacturer#3 1410.39 19 3 3937.95 3932.95 +Manufacturer#3 1671.68 17 4 5609.63 5604.63 +Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001 +Manufacturer#4 1206.26 27 1 1206.26 1201.26 +Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997 +Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997 +Manufacturer#4 1620.67 10 4 5492.7 5487.7 +Manufacturer#4 1844.92 7 5 7337.62 7332.62 +Manufacturer#5 1018.1 46 1 1018.1 1013.1 +Manufacturer#5 1464.48 23 2 2482.58 2477.58 +Manufacturer#5 1611.66 6 3 4094.24 4089.24 +Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999 +Manufacturer#5 1789.69 31 5 7672.66 7667.66 +PREHOOK: query: explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float) + sort order: ++++- + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 1, 4, 6, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col4, _col6, _col7 + Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: lead_window_0 + arguments: _col4, 3 + name: lead + window function: GenericUDAFLeadEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si f _c3 +alice allen 400 76.31 337.23 +alice davidson 384 71.97 357.79 +alice king 455 2.48 395.93 +alice king 458 62.77 384.16998 +alice xylophone 485 26.21 464.05 +bob falkner 260 59.07 242.4 +bob ichabod 454 73.83 381.7 +bob polk 264 20.95 257.17 +bob underhill 454 17.6 424.94 +bob underhill 465 72.3 453.17 +bob van buren 433 6.83 398.4 +calvin ichabod 431 29.06 334.22 +david garcia 485 11.83 421.51 +ethan steinbeck 298 34.6 288.14 +fred ellison 376 96.78 330.76 +holly steinbeck 384 63.49 293.7 +holly underhill 318 9.86 269.91 +irene ellison 458 45.24 365.29 +irene underhill 307 90.3 244.19 +jessica johnson 494 48.09 490.18 +jessica king 459 92.71 452.2 +jessica white 284 62.81 209.08 +luke garcia 311 3.82 267.27 +luke young 451 6.8 429.0 +mike king 275 74.92 211.81 +oscar garcia 362 43.73 340.66 +priscilla laertes 316 22.0 296.06 +priscilla quirinius 423 63.19 362.72 +priscilla zipper 485 21.34 400.61 +quinn ellison 266 19.94 209.95 +quinn polk 507 60.28 447.66 +sarah robinson 320 84.39 309.74 +tom polk 346 56.05 320.33 +ulysses ellison 381 59.34 358.66 +ulysses quirinius 303 10.26 259.6 +ulysses robinson 313 25.67 269.31 +ulysses steinbeck 333 22.34 270.61 +victor allen 337 43.4 311.5 +victor hernandez 447 43.69 375.22 +victor xylophone 438 62.39 424.33 +wendy quirinius 279 25.5 250.25 +wendy robinson 275 71.78 262.88 +wendy xylophone 314 13.67 295.73 +xavier garcia 493 28.75 474.56 +zach thompson 386 12.12 377.63 +zach young 286 18.27 263.65 +alice falkner 280 18.44 227.7 +bob ellison 339 8.37 300.95 +bob johnson 374 22.35 326.49 +calvin white 280 52.3 198.32 +david carson 270 38.05 255.77 +david falkner 469 47.51 388.35 +david hernandez 408 81.68 339.27 +ethan underhill 339 14.23 256.26 +gabriella brown 498 80.65 413.25 +holly nixon 505 68.73 440.71 +holly polk 268 82.74 182.04001 +holly thompson 387 84.75 298.22 +irene young 458 64.29 401.8 +jessica miller 299 85.96 243.41 +katie ichabod 469 88.78 385.61 +luke ichabod 289 56.2 286.74 +luke king 337 55.59 274.88 +mike allen 465 83.39 383.03 +mike polk 500 2.26 427.74 +mike white 454 62.12 430.78 +mike xylophone 448 81.97 447.17 +nick nixon 335 72.26 240.78 +nick robinson 350 23.22 294.59 +oscar davidson 432 0.83 420.93 +oscar johnson 315 94.22 233.05 +oscar johnson 469 55.41 468.44 +oscar miller 324 11.07 265.19 +rachel davidson 507 81.95 468.78 +rachel thompson 344 0.56 246.12 +sarah miller 386 58.81 304.36 +sarah xylophone 275 38.22 177.48999 +sarah zipper 376 97.88 294.61 +tom hernandez 467 81.64 459.9 +tom hernandez 477 97.51 415.19 +tom steinbeck 414 81.39 361.87 +ulysses carson 343 7.1 314.22 +victor robinson 415 61.81 349.5 +victor thompson 344 52.13 NULL +xavier ovid 280 28.78 NULL +yuri xylophone 430 65.5 NULL +alice underhill 389 26.68 368.06 +alice underhill 446 6.49 444.21 +bob ovid 331 67.12 236.43 +bob van buren 406 20.94 383.32 +david falkner 406 1.79 374.34 +david miller 450 94.57 380.13 +ethan allen 380 22.68 375.6 +ethan king 395 31.66 361.51 +ethan nixon 475 69.87 431.39 +ethan polk 283 4.4 243.82 +fred allen 331 33.49 281.68 +fred king 511 43.61 457.22 +fred polk 261 39.18 248.73 +fred young 303 49.32 221.51001 +PREHOOK: query: explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), i (type: int), s (type: string) + sort order: +++ + Map-reduce partition columns: si (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col1, _col2, _col7 + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: lead_window_0 + arguments: _col2, 3, 0 + name: lead + window function: GenericUDAFLeadEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +wendy garcia 65540 -18 +ethan thompson 65543 -20 +zach nixon 65549 -31 +alice robinson 65558 -28 +wendy nixon 65563 -33 +victor robinson 65580 -19 +ethan falkner 65586 -18 +victor davidson 65596 -17 +xavier quirinius 65599 -14 +fred quirinius 65604 -11 +nick zipper 65613 -3 +xavier van buren 65613 -7 +victor johnson 65615 -12 +alice ovid 65616 -24 +xavier ovid 65620 -23 +ulysses white 65627 -24 +sarah white 65640 -13 +calvin young 65643 -25 +victor thompson 65651 -42 +calvin johnson 65653 -53 +irene polk 65668 -45 +zach underhill 65693 -38 +quinn hernandez 65706 -27 +rachel ovid 65713 -24 +gabriella falkner 65731 -7 +zach white 65733 -8 +fred hernandez 65737 -7 +rachel ellison 65738 -6 +oscar steinbeck 65741 -6 +alice ellison 65744 -8 +tom allen 65744 -19 +quinn quirinius 65747 -31 +victor hernandez 65752 -26 +holly xylophone 65763 -26 +david davidson 65778 65778 +ulysses young 65778 65778 +sarah brown 65789 65789 +xavier brown 65541 -16 +zach hernandez 65542 -18 +katie ichabod 65547 -19 +oscar young 65557 -15 +holly white 65560 -14 +priscilla laertes 65566 -9 +ethan king 65572 -6 +zach hernandez 65574 -10 +oscar thompson 65575 -13 +victor xylophone 65578 -16 +gabriella ellison 65584 -26 +nick quirinius 65588 -22 +holly robinson 65594 -18 +alice xylophone 65610 -16 +yuri brown 65610 -21 +sarah hernandez 65612 -26 +katie garcia 65626 -28 +jessica laertes 65631 -23 +ethan underhill 65638 -17 +irene young 65654 -37 +priscilla thompson 65654 -40 +luke quirinius 65655 -44 +david brown 65691 -20 +luke falkner 65694 -18 +priscilla miller 65699 -20 +rachel robinson 65711 -9 +ethan polk 65712 -10 +wendy brown 65719 -13 +mike underhill 65720 -18 +zach underhill 65722 -26 +nick zipper 65732 -20 +fred brown 65738 -18 +ulysses young 65748 -23 +nick davidson 65752 -19 +fred zipper 65756 -15 +yuri nixon 65771 -10 +zach hernandez 65771 -19 +zach zipper 65771 65771 +alice underhill 65781 65781 +oscar laertes 65790 65790 +sarah zipper 65546 -19 +bob falkner 65551 -17 +luke ovid 65551 -17 +katie allen 65565 -4 +nick falkner 65568 -5 +zach steinbeck 65568 -11 +oscar van buren 65569 -13 +gabriella young 65573 -11 +jessica ichabod 65579 -24 +david garcia 65582 -24 +nick xylophone 65584 -27 +calvin johnson 65603 -14 +xavier zipper 65606 -50 +alice nixon 65611 -58 +jessica laertes 65617 -62 +fred king 65656 -61 +priscilla underhill 65669 -48 +priscilla zipper 65679 -45 +nick king 65717 -11 +sarah polk 65717 -17 +irene quirinius 65724 -28 +tom laertes 65728 -25 +yuri johnson 65734 -27 +PREHOOK: query: explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double) + sort order: ++++ + Map-reduce partition columns: b (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 3, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col1, _col3, _col5, _col7 + Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col5, 3 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 12000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 12000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si d _c3 +jessica ellison 262 30.41 NULL +david young 266 45.12 NULL +jessica steinbeck 274 2.15 NULL +david zipper 275 43.45 244.59 +zach nixon 283 15.95 237.88 +holly allen 285 24.37 282.85 +irene garcia 292 33.54 248.55 +ulysses xylophone 292 44.66 276.05 +irene van buren 309 35.81 284.63 +sarah miller 312 6.65 278.46 +victor garcia 312 39.14 267.34000000000003 +ethan ichabod 319 29.4 283.19 +wendy falkner 322 10.02 315.35 +oscar miller 324 25.95 284.86 +david ovid 332 28.34 302.6 +alice zipper 333 3.38 322.98 +yuri nixon 333 8.28 307.05 +ulysses nixon 335 18.48 306.66 +david ovid 336 9.36 332.62 +calvin falkner 337 17.63 328.72 +katie quirinius 349 11.3 330.52 +quinn miller 351 22.46 341.64 +victor xylophone 357 38.58 339.37 +ethan garcia 368 9.2 356.7 +nick steinbeck 395 37.54 372.54 +ulysses ichabod 415 47.61 376.42 +rachel thompson 416 37.99 406.8 +calvin young 418 47.22 380.46 +katie xylophone 425 32.59 377.39 +nick quirinius 429 19.63 391.01 +ethan ellison 453 47.92 405.78 +irene nixon 454 48.03 421.40999999999997 +bob steinbeck 462 47.04 442.37 +luke robinson 462 47.48 414.08 +gabriella steinbeck 467 9.35 418.97 +tom hernandez 467 29.36 419.96 +irene polk 485 14.26 437.52 +mike xylophone 494 36.92 484.65 +calvin allen 499 39.99 469.64 +quinn steinbeck 503 16.62 488.74 +calvin thompson 263 30.87 NULL +rachel quirinius 263 29.46 NULL +ulysses garcia 263 31.85 NULL +mike steinbeck 266 48.57 235.13 +rachel young 275 14.75 245.54 +tom king 278 31.11 246.15 +oscar robinson 283 30.35 234.43 +zach allen 284 1.88 269.25 +bob king 308 27.61 276.89 +ulysses allen 310 22.77 279.65 +fred nixon 317 0.48 315.12 +gabriella robinson 321 0.33 293.39 +bob johnson 325 9.61 302.23 +rachel davidson 335 2.34 334.52 +fred brown 337 5.8 336.67 +wendy ellison 350 20.25 340.39 +zach falkner 391 13.67 388.66 +katie xylophone 410 39.09 404.2 +holly king 413 3.56 392.75 +sarah van buren 417 7.81 403.33 +calvin van buren 430 36.01 390.90999999999997 +katie white 434 33.56 430.44 +oscar quirinius 454 7.03 446.19 +zach young 505 18.19 468.99 +gabriella robinson 506 12.8 472.44 +sarah xylophone 507 16.09 499.97 +rachel thompson 267 46.87 NULL +gabriella van buren 271 41.04 NULL +mike steinbeck 284 11.44 NULL +ethan ovid 293 2.08 246.13 +luke falkner 293 40.67 251.96 +irene nixon 321 24.35 309.56 +mike van buren 327 2.58 324.92 +ulysses robinson 329 26.64 288.33 +quinn laertes 332 10.71 307.65 +tom polk 346 34.03 343.42 +jessica johnson 352 45.71 325.36 +xavier davidson 354 33.9 343.29 +wendy nixon 364 29.42 329.97 +jessica quirinius 375 47.33 329.29 +xavier brown 376 26.17 342.1 +gabriella davidson 383 18.87 353.58 +jessica brown 388 34.09 340.67 +gabriella garcia 391 32.44 364.83 +ethan miller 396 49.07 377.13 +bob garcia 416 7.82 381.90999999999997 +priscilla hernandez 416 29.94 383.56 +holly nixon 419 17.81 369.93 +nick underhill 429 39.54 421.18 +xavier falkner 434 0.88 404.06 +luke robinson 461 44.02 443.19 +bob underhill 465 22.58 425.46 +ulysses king 483 37.98 482.12 +jessica miller 486 26.14 441.98 +bob ovid 493 9.7 470.42 +alice falkner 500 37.85 462.02 +quinn xylophone 267 49.8 NULL +gabriella thompson 268 17.15 NULL +calvin xylophone 275 49.32 NULL +gabriella zipper 279 30.41 229.2 +PREHOOK: query: explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: f (type: float), b (type: bigint) + sort order: ++ + Map-reduce partition columns: f (type: float) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 4, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string) + outputColumnNames: _col3, _col4, _col7 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col3: bigint, _col4: float, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col7, 3, 'fred' + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), lag_window_0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s lag_window_0 +yuri thompson fred +bob ichabod fred +luke king fred +luke steinbeck fred +fred zipper fred +quinn miller fred +calvin van buren fred +holly steinbeck fred +david davidson fred +calvin thompson fred +calvin quirinius fred +david ovid fred +holly thompson fred +nick zipper fred +victor steinbeck fred +victor robinson fred +zach ovid fred +ulysses zipper fred +luke falkner fred +irene thompson fred +yuri johnson fred +ulysses falkner fred +gabriella robinson fred +alice robinson fred +priscilla xylophone fred +david laertes fred +mike underhill fred +victor van buren fred +holly falkner fred +priscilla falkner fred +ethan ovid fred +luke zipper fred +mike steinbeck fred +calvin white fred +alice quirinius fred +irene miller fred +wendy polk fred +nick young fred +yuri davidson fred +ethan ellison fred +zach hernandez fred +wendy miller fred +katie underhill fred +irene zipper fred +holly allen fred +quinn brown fred +calvin ovid fred +zach robinson fred +nick miller fred +mike allen fred +yuri van buren fred +priscilla young fred +zach miller fred +victor xylophone fred +sarah falkner fred +rachel ichabod fred +alice robinson fred +calvin ovid fred +calvin ovid fred +luke laertes fred +david hernandez fred +alice ovid fred +luke quirinius fred +oscar white fred +zach falkner fred +rachel thompson fred +priscilla king fred +xavier polk fred +wendy ichabod fred +rachel ovid fred +wendy allen fred +luke brown fred +mike brown fred +oscar ichabod fred +xavier garcia fred +yuri brown fred +bob xylophone fred +luke davidson fred +ethan quirinius fred +zach davidson fred +irene miller fred +wendy king fred +bob zipper fred +sarah thompson fred +bob carson fred +bob laertes fred +xavier allen fred +sarah robinson fred +david king fred +oscar davidson fred +victor hernandez fred +wendy polk fred +david ellison fred +ulysses johnson fred +jessica ovid fred +bob king fred +ulysses garcia fred +irene falkner fred +holly robinson fred +yuri white fred +PREHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_type (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2, _col4 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 0] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + partitionExpressions: [col 0, col 1] + streamingColumns: [] + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3] + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#2 1800.7 +Manufacturer#4 1375.42 +Manufacturer#4 1620.67 +Manufacturer#4 1206.26 +Manufacturer#5 1788.73 +Manufacturer#1 1632.66 +Manufacturer#2 1690.68 +Manufacturer#2 1698.66 +Manufacturer#2 1701.6 +Manufacturer#3 1337.29 +Manufacturer#4 1844.92 +Manufacturer#4 1290.35 +Manufacturer#5 1018.1 +Manufacturer#5 1789.69 +Manufacturer#1 1753.76 +Manufacturer#1 1602.59 +Manufacturer#1 1173.15 +Manufacturer#1 1173.15 +Manufacturer#1 1414.42 +Manufacturer#2 2031.98 +Manufacturer#3 1922.98 +Manufacturer#3 1410.39 +Manufacturer#3 1190.27 +Manufacturer#5 1464.48 +Manufacturer#5 1611.66 +Manufacturer#3 1671.68 +PREHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_type (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col2, _col4, _col7 + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr avg_window_0 +Manufacturer#1 1753.76 +Manufacturer#1 1693.21 +Manufacturer#1 1663.0033333333333 +Manufacturer#1 1540.54 +Manufacturer#1 1467.062 +Manufacturer#1 1458.2883333333332 +Manufacturer#2 1800.7 +Manufacturer#2 1745.69 +Manufacturer#2 1841.1200000000001 +Manufacturer#2 1805.505 +Manufacturer#2 1784.7240000000002 +Manufacturer#3 1922.98 +Manufacturer#3 1666.685 +Manufacturer#3 1668.3500000000001 +Manufacturer#3 1548.83 +Manufacturer#3 1506.522 +Manufacturer#4 1844.92 +Manufacturer#4 1610.17 +Manufacturer#4 1613.67 +Manufacturer#4 1511.8175 +Manufacturer#4 1467.5240000000001 +Manufacturer#5 1018.1 +Manufacturer#5 1241.29 +Manufacturer#5 1424.0900000000001 +Manufacturer#5 1515.25 +Manufacturer#5 1534.532 +PREHOOK: query: create table t1 (a1 int, b1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1 (a1 int, b1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2 (a1 int, b1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2 (a1 int, b1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), i (type: int) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, VALUE._col6:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col2, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [bigint, int, string, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: bigint), _col7 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +PREHOOK: Output: default@t1 +PREHOOK: Output: default@t2 +POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] +POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ] +_col0 _col1 +PREHOOK: query: select * from t1 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +t1.a1 t1.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: select * from t2 limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +t2.a1 t2.b1 +65542 rachel thompson +131088 oscar brown +262258 wendy steinbeck +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_retailprice (type: double) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: p_size (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_1 + arguments: lag(...) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_2 + arguments: _col7 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Lead/Lag information: lag(...) (type: double) + Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), ((round(sum_window_0, 2) + 50.0) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 2964 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 11 + Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 1254 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 11 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_retailprice p_size _c3 +Manufacturer#1 1173.15 2 true +Manufacturer#1 1173.15 2 true +Manufacturer#1 1414.42 28 true +Manufacturer#1 1602.59 6 true +Manufacturer#1 1632.66 42 true +Manufacturer#1 1753.76 34 true +Manufacturer#2 1690.68 14 true +Manufacturer#2 1698.66 25 true +Manufacturer#2 1701.6 18 true +Manufacturer#2 1800.7 40 true +Manufacturer#2 2031.98 2 true diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out new file mode 100644 index 0000000..a9079c7 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out @@ -0,0 +1,303 @@ +PREHOOK: query: explain vectorization detail + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: value is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), c_int (type: int), c_boolean (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 4] + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [1, 2, 4] + dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean + partitionColumnCount: 0 + Map 5 + Map Operator Tree: + TableScan + alias: wr + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 6) -> boolean + predicate: cstring1 is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6] + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 6] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col2 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:boolean, VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val 0) -> 3:long, DoubleColDivideDoubleColumn(col 4, col 5)(children: CastLongToDouble(col 1) -> 4:double, CastLongToDouble(col 2) -> 5:double) -> 6:double + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:double, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 9:double] + functionNames: [rank] + keyInputColumns: [] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 8:double] + outputColumns: [4, 2, 3] + outputTypes: [int, bigint, bigint] + partitionExpressions: [ConstantVectorExpression(val 0) -> 5:long] + streamingColumns: [4] + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +return_rank diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out new file mode 100644 index 0000000..14114be --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -0,0 +1,1114 @@ +PREHOOK: query: explain vectorization detail +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + Statistics: Num rows: 20 Data size: 1691 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(c_int) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 2] + dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val 0) -> 2:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 1] + outputTypes: [int, bigint] + partitionExpressions: [ConstantVectorExpression(val 0) -> 3:long] + streamingColumns: [2] + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +return_rank +1 +2 +2 +2 +5 +5 +7 +PREHOOK: query: explain vectorization detail +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 3306 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 1, 2] + selectExpressions: CastStringToLong(col 0) -> 5:int + Statistics: Num rows: 20 Data size: 3306 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0, 1] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2] + dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:int, VALUE._col0:string, VALUE._col1:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:bigint, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0, 1] + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col0 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 2, 0, 1] + outputTypes: [double, int, string, bigint] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 5 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: avg_window_0 (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +return_rank +NULL +1.0 +2.0 +3.0 +PREHOOK: query: explain vectorization detail +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: cbo_t3 + Statistics: Num rows: 20 Data size: 3382 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: key (type: string), value (type: string), (UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 6, 7, 2, 9] + selectExpressions: DoubleColSubtractDoubleColumn(col 5, col 3)(children: CastLongToFloatViaLongToDouble(col 2) -> 5:double) -> 6:double, DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 7:double, DoubleColSubtractDoubleColumn(col 8, col 5)(children: DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 8:double, CastLongToDouble(col 2) -> 5:double) -> 9:double + Statistics: Num rows: 20 Data size: 3382 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 6) -> double, VectorUDAFSumDouble(col 7) -> double, VectorUDAFMaxLong(col 2) -> int, VectorUDAFSumDouble(col 9) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3] + dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, double + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:double, VALUE._col1:double, VALUE._col2:int, VALUE._col3:double + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFMaxLong(col 4) -> int, VectorUDAFSumDouble(col 5) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + projectedOutputColumns: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: double) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col0:string, VALUE._col1:double, VALUE._col2:int, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string, string + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 1, 3, 4, 5] + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double, _col3: double, _col4: int, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3, 4, 5] + orderExpressions: [col 1] + outputColumns: [6, 0, 2, 1, 3, 4, 5] + outputTypes: [int, string, string, double, double, int, double] + partitionExpressions: [col 0] + streamingColumns: [6] + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int), _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) + outputColumnNames: rank_window_0, _col1, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 3, 4, 5] + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: lower(_col1) (type: string), _col3 (type: double) + sort order: ++ + Map-reduce partition columns: lower(_col1) (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: StringLower(col 2) -> 7:String + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col0:int, VALUE._col2:string, VALUE._col4:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, string + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col2, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 1, 4, 5] + Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: lower(_col2) + raw input shape: + window functions: + window function definition + alias: dense_rank_window_1 + arguments: _col4 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDenseRank] + functionInputExpressions: [col 1] + functionNames: [dense_rank] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [2, 3, 4, 5] + orderExpressions: [col 1] + outputColumns: [6, 2, 3, 1, 4, 5] + outputTypes: [int, int, string, double, int, double] + partitionExpressions: [StringLower(col 3) -> 7:String] + streamingColumns: [6] + Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: dense_rank_window_1 (type: int), _col0 (type: int), _col5 (type: int), _col6 (type: double) + outputColumnNames: dense_rank_window_1, _col0, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 2, 4, 5] + Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col5 (type: int), _col6 (type: double) + sort order: ++ + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: dense_rank_window_1 (type: int), _col0 (type: int) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: percent_rank not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col6: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: percent_rank_window_2 + arguments: _col7 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +_c0 _c1 _c2 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +PREHOOK: query: explain vectorization detail +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 1767 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean + predicate: value is not null (type: boolean) + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string), c_int (type: int), c_boolean (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 4] + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: boolean) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [1, 2, 4] + dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean + partitionColumnCount: 0 + Map 5 + Map Operator Tree: + TableScan + alias: wr + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 6) -> boolean + predicate: cstring1 is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 6] + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 6] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col3), sum(_col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col2 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:boolean, VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val 0) -> 3:long, DoubleColDivideDoubleColumn(col 4, col 5)(children: CastLongToDouble(col 1) -> 4:double, CastLongToDouble(col 2) -> 5:double) -> 6:double + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:double, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 9:double] + functionNames: [rank] + keyInputColumns: [] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [DoubleColDivideDoubleColumn(col 6, col 7)(children: CastLongToDouble(col 2) -> 6:double, CastLongToDouble(col 3) -> 7:double) -> 8:double] + outputColumns: [4, 2, 3] + outputTypes: [int, bigint, bigint] + partitionExpressions: [ConstantVectorExpression(val 0) -> 5:long] + streamingColumns: [4] + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +return_rank diff --git ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out new file mode 100644 index 0000000..a639e9f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out @@ -0,0 +1,1664 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), si (type: smallint) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 3, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:smallint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col3, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col3: bigint, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_1 + arguments: _col3 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1, col 2] + functionNames: [rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 1, 2, 0] + outputTypes: [int, bigint, smallint, bigint, string] + partitionExpressions: [col 0] + streamingColumns: [3] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), rank_window_0 (type: int), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s rank_window_0 sum_window_1 +alice allen 1 4294967503 +alice allen 2 8589934990 +alice allen 3 12884902428 +alice allen 4 17179869743 +alice allen 5 21474837237 +alice allen 6 30064772191 +alice allen 6 30064772191 +alice allen 8 34359739722 +alice brown 1 4294967391 +alice brown 2 8589934706 +alice brown 3 12884902122 +alice brown 4 17179869504 +alice brown 5 21474836859 +alice brown 6 25769804175 +alice brown 7 30064771680 +alice brown 8 34359739221 +alice brown 9 38654706641 +alice brown 10 42949674011 +alice brown 11 47244641313 +alice brown 12 51539608718 +alice brown 13 55834576122 +alice brown 14 60129543595 +alice carson 1 4294967446 +alice carson 2 8589934775 +alice carson 3 12884902150 +alice carson 4 17179869461 +alice carson 5 21474836824 +alice carson 6 25769804187 +alice carson 7 30064771550 +alice carson 8 34359738920 +alice carson 9 38654706240 +alice carson 10 42949673743 +alice davidson 1 4294967453 +alice davidson 2 8589934978 +alice davidson 3 12884902338 +alice davidson 4 17179869653 +alice davidson 5 21474836975 +alice davidson 6 25769804493 +alice davidson 7 30064772010 +alice davidson 8 34359739463 +alice davidson 9 38654706943 +alice davidson 10 47244641824 +alice davidson 10 47244641824 +alice davidson 12 51539609264 +alice davidson 13 55834576590 +alice davidson 14 60129544020 +alice davidson 15 64424511548 +alice davidson 16 68719479029 +alice davidson 17 73014446462 +alice davidson 18 77309413954 +alice ellison 1 4294967496 +alice ellison 2 8589934942 +alice ellison 3 12884902454 +alice ellison 4 17179869870 +alice ellison 5 21474837181 +alice ellison 6 25769804587 +alice ellison 7 30064772066 +alice ellison 8 34359739616 +alice ellison 9 38654706933 +alice ellison 10 42949674421 +alice ellison 11 47244641904 +alice ellison 12 51539609208 +alice ellison 13 55834576596 +alice ellison 14 60129544054 +alice ellison 15 64424511508 +alice falkner 1 4294967377 +alice falkner 2 8589934805 +alice falkner 3 12884902121 +alice falkner 4 17179869431 +alice falkner 5 21474836879 +alice falkner 6 25769804283 +alice falkner 7 30064771719 +alice falkner 8 38654706491 +alice falkner 8 38654706491 +alice falkner 10 42949673903 +alice falkner 11 51539608896 +alice falkner 11 51539608896 +alice falkner 13 55834576336 +alice falkner 14 60129543752 +alice falkner 15 64424511125 +alice falkner 16 68719478658 +alice falkner 17 73014445956 +alice garcia 1 4294967303 +alice garcia 2 8589934839 +alice garcia 3 12884902276 +alice garcia 4 17179869705 +alice garcia 5 21474837050 +alice garcia 6 25769804353 +alice garcia 7 30064771681 +alice garcia 8 34359739213 +alice garcia 9 38654706564 +alice garcia 10 47244641402 +alice garcia 10 47244641402 +alice garcia 12 51539608899 +alice garcia 13 55834576425 +alice hernandez 1 4294967345 +alice hernandez 2 8589934782 +alice hernandez 3 12884902197 +alice hernandez 4 17179869695 +alice hernandez 5 21474837123 +PREHOOK: query: explain vectorization detail +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 3913 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7, val tom allen) -> boolean, FilterStringGroupColEqualStringScalar(col 7, val bob steinbeck) -> boolean) -> boolean + predicate: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s (type: string), dec (type: decimal(4,2)) + sort order: +- + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: bigint), ts (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 7, 8, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:decimal(4,2), VALUE._col3:bigint, VALUE._col7:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col3 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col7 (type: timestamp), KEY.reducesinkkey1 (type: decimal(4,2)) + outputColumnNames: _col3, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0, 3, 1] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col3: bigint, _col7: string, _col8: timestamp, _col9: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col9 DESC NULLS LAST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col9 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 0, 3, 1] + outputTypes: [int, bigint, string, timestamp, decimal(4,2)] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), _col3 (type: bigint), _col7 (type: string), _col8 (type: timestamp) + outputColumnNames: rank_window_0, _col3, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 2, 0, 3] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: string), _col8 (type: timestamp) + sort order: +- + Map-reduce partition columns: _col7 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, VALUE._col0:int, VALUE._col4:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col4 (type: bigint), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: timestamp) + outputColumnNames: _col0, _col4, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 0, 1] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col4: bigint, _col8: string, _col9: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col9 DESC NULLS LAST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_1 + arguments: _col4 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 3] + functionNames: [sum] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 3, 0, 1] + outputTypes: [bigint, int, bigint, string, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col0 (type: int), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4] + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3912 Data size: 1017283 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s _c1 sum_window_1 +bob steinbeck 11 47244642041 +bob steinbeck 1 47244642041 +bob steinbeck 2 47244642041 +bob steinbeck 7 47244642041 +bob steinbeck 8 47244642041 +bob steinbeck 9 47244642041 +bob steinbeck 6 47244642041 +bob steinbeck 10 47244642041 +bob steinbeck 3 47244642041 +bob steinbeck 4 47244642041 +bob steinbeck 5 47244642041 +tom allen 9 81604381169 +tom allen 3 81604381169 +tom allen 7 81604381169 +tom allen 16 81604381169 +tom allen 8 81604381169 +tom allen 10 81604381169 +tom allen 15 81604381169 +tom allen 2 81604381169 +tom allen 6 81604381169 +tom allen 18 81604381169 +tom allen 1 81604381169 +tom allen 5 81604381169 +tom allen 19 81604381169 +tom allen 17 81604381169 +tom allen 11 81604381169 +tom allen 4 81604381169 +tom allen 12 81604381169 +tom allen 13 81604381169 +tom allen 14 81604381169 +PREHOOK: query: explain vectorization detail +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7, val tom allen) -> boolean, FilterStringGroupColEqualStringScalar(col 7, val bob steinbeck) -> boolean) -> boolean + predicate: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s (type: string) + sort order: + + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + value expressions: si (type: smallint), i (type: int), f (type: float) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 4, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col1:smallint, VALUE._col2:int, VALUE._col4:float + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col4 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 0] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col4: float, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2, 3] + orderExpressions: [col 0] + outputColumns: [4, 1, 2, 3, 0] + outputTypes: [bigint, smallint, int, float, string] + streamingColumns: [] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: bigint), _col1 (type: smallint), _col4 (type: float), _col7 (type: string) + outputColumnNames: sum_window_0, _col1, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 1, 3, 0] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint) + sort order: + + Map-reduce partition columns: _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + value expressions: sum_window_0 (type: bigint), _col4 (type: float), _col7 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:bigint, VALUE._col4:float, VALUE._col7:string + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: smallint), VALUE._col4 (type: float), VALUE._col7 (type: string) + outputColumnNames: _col0, _col2, _col5, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 3] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col2: smallint, _col5: float, _col8: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2, 3] + orderExpressions: [col 0] + outputColumns: [4, 1, 0, 2, 3] + outputTypes: [double, bigint, smallint, float, string] + streamingColumns: [] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col0 (type: bigint), sum_window_1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 4] + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9084 Data size: 1017431 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s _c1 sum_window_1 +bob steinbeck 722083 38.33000183105469 +tom allen 1248023 89.88999938964844 +tom allen 1248023 83.47000122070312 +bob steinbeck 722083 47.810001373291016 +bob steinbeck 722083 68.46999740600586 +tom allen 1248023 68.46999740600586 +bob steinbeck 722083 28.479999542236328 +tom allen 1248023 2.8499999046325684 +bob steinbeck 722083 26.290000915527344 +bob steinbeck 722083 36.209999084472656 +bob steinbeck 722083 83.52999877929688 +tom allen 1248023 39.4900016784668 +bob steinbeck 722083 80.7300033569336 +tom allen 1248023 77.77999877929688 +tom allen 1248023 26.239999771118164 +tom allen 1248023 95.41000366210938 +tom allen 1248023 81.8499984741211 +tom allen 1248023 11.300000190734863 +tom allen 1248023 55.38999938964844 +tom allen 1248023 132.82000350952148 +bob steinbeck 722083 132.82000350952148 +tom allen 1248023 47.16999816894531 +tom allen 1248023 11.069999694824219 +bob steinbeck 722083 83.52999877929688 +tom allen 1248023 19.459999084472656 +tom allen 1248023 14.510000228881836 +tom allen 1248023 38.93000030517578 +tom allen 1248023 15.84000015258789 +tom allen 1248023 52.779998779296875 +bob steinbeck 722083 9.699999809265137 +PREHOOK: query: explain vectorization detail +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7, val tom allen) -> boolean, FilterStringGroupColEqualStringScalar(col 7, val bob steinbeck) -> boolean) -> boolean + predicate: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s (type: string), bo (type: boolean) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: si (type: smallint), bin (type: binary) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 6, 7, 10] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:boolean, VALUE._col1:smallint, VALUE._col8:binary + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: smallint), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey0 (type: string), VALUE._col8 (type: binary) + outputColumnNames: _col1, _col6, _col7, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0, 3] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col6: boolean, _col7: string, _col10: binary + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col6 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 1, 0, 3] + outputTypes: [int, smallint, boolean, string, binary] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), _col1 (type: smallint), _col7 (type: string), _col10 (type: binary) + outputColumnNames: rank_window_0, _col1, _col7, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 2, 0, 3] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col10 (type: binary) + sort order: +- + Map-reduce partition columns: _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), _col7 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:binary, VALUE._col0:int, VALUE._col7:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col7 (type: string), KEY.reducesinkkey1 (type: binary) + outputColumnNames: _col0, _col2, _col8, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0, 3, 1] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: smallint, _col8: string, _col11: binary + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col11 DESC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_1 + arguments: _col11 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 0, 3, 1] + outputTypes: [int, int, smallint, string, binary] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col0 (type: int), rank_window_1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2, 4] + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4892 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s _c1 rank_window_1 +tom allen 1 1 +tom allen 1 1 +tom allen 7 1 +bob steinbeck 1 1 +bob steinbeck 5 1 +bob steinbeck 5 1 +tom allen 7 1 +tom allen 1 1 +bob steinbeck 5 1 +tom allen 1 1 +bob steinbeck 1 1 +tom allen 7 1 +tom allen 1 1 +tom allen 7 1 +bob steinbeck 5 1 +tom allen 7 1 +tom allen 7 1 +tom allen 7 1 +bob steinbeck 5 1 +tom allen 7 1 +tom allen 7 1 +tom allen 7 1 +bob steinbeck 5 1 +tom allen 7 1 +tom allen 7 1 +bob steinbeck 1 2 +bob steinbeck 5 1 +tom allen 1 1 +bob steinbeck 1 1 +tom allen 7 2 +PREHOOK: query: explain vectorization detail +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7, val tom allen) -> boolean, FilterStringGroupColEqualStringScalar(col 7, val bob steinbeck) -> boolean) -> boolean + predicate: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: i (type: int) + sort order: + + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + value expressions: f (type: float), s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 4, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col3:float, VALUE._col6:string + partitionColumnCount: 0 + scratchColumnTypeNames: double, bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col3 (type: float), VALUE._col6 (type: string) + outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col4: float, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, int, float, string] + streamingColumns: [] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double), _col4 (type: float), _col7 (type: string) + outputColumnNames: sum_window_0, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int), _col4 (type: float) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: ConstantVectorExpression(val 0) -> 4:long + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + value expressions: sum_window_0 (type: double), _col7 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:float, VALUE._col0:double, VALUE._col7:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), KEY.reducesinkkey1 (type: float), VALUE._col7 (type: string) + outputColumnNames: _col0, _col5, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 3] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: double, _col5: float, _col8: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_1 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRowNumber] + functionInputExpressions: [null] + functionNames: [row_number] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 1, 3] + outputTypes: [int, double, float, string] + partitionExpressions: [ConstantVectorExpression(val 0) -> 5:long] + streamingColumns: [4] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col0 (type: double), row_number_window_1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2, 4] + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9420 Data size: 1017435 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck' +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s _c1 row_number_window_1 +tom allen 2.8499999046325684 1 +bob steinbeck 9.699999809265137 2 +tom allen 11.069999694824219 3 +tom allen 11.300000190734863 4 +tom allen 54.00000190734863 5 +tom allen 15.84000015258789 6 +tom allen 19.459999084472656 7 +tom allen 26.239999771118164 8 +bob steinbeck 26.290000915527344 9 +bob steinbeck 27.959999084472656 10 +bob steinbeck 28.479999542236328 11 +bob steinbeck 36.209999084472656 12 +bob steinbeck 38.33000183105469 13 +tom allen 38.93000030517578 14 +tom allen 54.00000190734863 15 +tom allen 40.5099983215332 16 +tom allen 47.16999816894531 17 +bob steinbeck 47.810001373291016 18 +tom allen 50.630001068115234 19 +tom allen 52.779998779296875 20 +tom allen 55.38999938964844 21 +tom allen 77.77999877929688 22 +bob steinbeck 80.7300033569336 23 +tom allen 81.8499984741211 24 +bob steinbeck 82.19000244140625 25 +tom allen 83.47000122070312 26 +bob steinbeck 83.52999877929688 27 +bob steinbeck 83.52999877929688 28 +tom allen 89.88999938964844 29 +tom allen 95.41000366210938 30 +PREHOOK: query: explain vectorization detail +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 4625 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7, val tom allen) -> boolean, FilterStringGroupColEqualStringScalar(col 7, val bob steinbeck) -> boolean) -> boolean + predicate: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: s (type: string), dec (type: decimal(4,2)) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + value expressions: si (type: smallint), f (type: float) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 4, 7, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:decimal(4,2), VALUE._col1:smallint, VALUE._col4:float + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: smallint), VALUE._col4 (type: float), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(4,2)) + outputColumnNames: _col1, _col4, _col7, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 0, 1] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col4: float, _col7: string, _col9: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col9 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col9 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 3, 0, 1] + outputTypes: [int, smallint, float, string, decimal(4,2)] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), _col1 (type: smallint), _col4 (type: float), _col7 (type: string) + outputColumnNames: rank_window_0, _col1, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 2, 3, 0] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: smallint), _col4 (type: float) + sort order: ++ + Map-reduce partition columns: _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), _col7 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:float, VALUE._col0:int, VALUE._col6:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: float), VALUE._col6 (type: string) + outputColumnNames: _col0, _col2, _col5, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0, 1, 3] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: smallint, _col5: float, _col8: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_1 + arguments: _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1] + outputColumns: [4, 2, 0, 1, 3] + outputTypes: [int, int, smallint, float, string] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string), _col0 (type: int), rank_window_1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 2, 4] + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4624 Data size: 1017323 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s _c1 rank_window_1 +tom allen 14 1 +tom allen 17 1 +tom allen 7 1 +bob steinbeck 1 1 +bob steinbeck 11 1 +bob steinbeck 7 1 +tom allen 12 1 +tom allen 15 1 +bob steinbeck 10 1 +tom allen 13 1 +bob steinbeck 5 1 +tom allen 11 1 +tom allen 2 1 +tom allen 9 1 +bob steinbeck 8 1 +tom allen 3 1 +tom allen 4 1 +tom allen 8 1 +bob steinbeck 3 1 +tom allen 10 1 +tom allen 18 1 +tom allen 19 1 +bob steinbeck 6 1 +tom allen 5 1 +bob steinbeck 9 1 +tom allen 6 2 +bob steinbeck 4 1 +tom allen 16 1 +tom allen 1 1 +bob steinbeck 2 2 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out new file mode 100644 index 0000000..8e52b3b --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -0,0 +1,1208 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over4_null' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over4_null' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: i (type: int), s (type: string), b (type: bigint) + sort order: +++ + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 3, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col2, _col3, _col7 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col3: bigint, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS LAST, _col3 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col3 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col7 (type: string), _col3 (type: bigint), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +i s b sum_window_0 +NULL alice ichabod NULL NULL +NULL NULL NULL NULL +65534 calvin miller NULL NULL +65534 NULL NULL NULL +65536 alice ichabod 4294967441 4294967441 +65536 alice robinson 4294967476 8589934917 +65536 bob robinson 4294967349 12884902266 +65536 calvin thompson 4294967336 17179869602 +65536 david johnson 4294967490 21474837092 +65536 david laertes 4294967431 25769804523 +PREHOOK: query: explain vectorization detail +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: d (type: double), s (type: string), f (type: float) + sort order: ++- + Map-reduce partition columns: d (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col4, _col5, _col7 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col4 DESC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: double), _col7 (type: string), _col4 (type: float), sum_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +d s f sum_window_0 +NULL alice ichabod NULL NULL +NULL calvin miller NULL NULL +0.01 NULL NULL NULL +0.01 NULL NULL NULL +0.01 calvin miller 8.39 8.390000343322754 +0.02 NULL NULL NULL +0.02 holly polk 5.29 5.289999961853027 +0.02 wendy quirinius 25.5 30.789999961853027 +0.02 yuri laertes 37.59 68.38000011444092 +0.03 nick steinbeck 79.24 79.23999786376953 +PREHOOK: query: explain vectorization detail +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), f (type: float) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: float), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col4, _col7, _col8 + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: timestamp), _col7 (type: string), _col4 (type: float), sum_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +ts s f sum_window_0 +2013-03-01 09:11:58.70307 NULL NULL 1276.850001335144 +2013-03-01 09:11:58.70307 gabriella xylophone 3.17 1276.850001335144 +2013-03-01 09:11:58.70307 calvin brown 10.89 1273.68000125885 +2013-03-01 09:11:58.70307 jessica laertes 14.54 1262.7900009155273 +2013-03-01 09:11:58.70307 yuri allen 14.78 1248.2500009536743 +2013-03-01 09:11:58.70307 tom johnson 17.85 1233.4700012207031 +2013-03-01 09:11:58.70307 bob ovid 20.61 1215.6200008392334 +2013-03-01 09:11:58.70307 fred nixon 28.69 1195.0100002288818 +2013-03-01 09:11:58.70307 oscar brown 29.22 1166.3199996948242 +2013-03-01 09:11:58.70307 calvin laertes 31.17 1137.1000003814697 +PREHOOK: query: explain vectorization detail +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: t (type: tinyint), s (type: string), d (type: double) + sort order: ++- + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col5, _col7 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col5 DESC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(5)~FOLLOWING(5) + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col7 (type: string), _col5 (type: double), avg_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +t s d avg_window_0 +-3 alice allen 29.44 33.20166666666666 +-3 alice davidson 31.52 30.741428571428568 +-3 alice falkner 49.8 27.742499999999996 +-3 alice king 41.5 26.706666666666663 +-3 alice king 30.76 26.306999999999995 +-3 alice xylophone 16.19 24.458181818181814 +-3 bob ellison 15.98 25.029090909090908 +-3 bob falkner 6.75 24.216363636363635 +-3 bob ichabod 18.42 20.173636363636362 +-3 bob johnson 22.71 16.431818181818176 +PREHOOK: query: explain vectorization detail +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), s (type: string) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + value expressions: i (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, VALUE._col2:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col2, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS LAST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 2, 1, 0] + outputTypes: [bigint, int, string, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: timestamp), _col7 (type: string), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + Statistics: Num rows: 7069 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Offset of rows: 3 + Statistics: Num rows: 10 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +ts s sum_window_0 +2013-03-01 09:11:58.70307 calvin laertes 197097 +2013-03-01 09:11:58.70307 calvin steinbeck 262874 +2013-03-01 09:11:58.70307 david falkner 328506 +2013-03-01 09:11:58.70307 fred nixon 394118 +2013-03-01 09:11:58.70307 fred zipper 459719 +2013-03-01 09:11:58.70307 gabriella van buren 525334 +2013-03-01 09:11:58.70307 gabriella xylophone 591058 +2013-03-01 09:11:58.70307 jessica laertes 656771 +2013-03-01 09:11:58.70307 jessica polk 722558 +2013-03-01 09:11:58.70307 katie king 788310 +PREHOOK: query: explain vectorization detail +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: +- + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 DESC NULLS LAST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), round(sum_window_0, 3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 4] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 3) -> 4:double + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +NULL 65536 0.02 +NULL 65534 0.03 +NULL NULL 0.04 +alice allen 65758 23.59 +alice allen 65720 43.98 +PREHOOK: query: explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: +- + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: +- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 DESC NULLS LAST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), round((avg_window_0 / 10.0), 3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: DoubleColDivideDoubleScalar(col 3, val 10.0) -> 4:double) -> 5:double + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +NULL 65536 0.002 +NULL 65534 0.002 +NULL NULL 0.001 +alice allen 65758 2.359 +alice allen 65720 2.199 +PREHOOK: query: explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: az + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS LAST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), round(((avg_window_0 + 10.0) - (avg_window_0 - 10.0)), 3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 4] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3)(children: DoubleColSubtractDoubleColumn(col 4, col 5)(children: DoubleColAddDoubleScalar(col 3, val 10.0) -> 4:double, DoubleColSubtractDoubleScalar(col 3, val 10.0) -> 5:double) -> 6:double) -> 4:double + Statistics: Num rows: 9088 Data size: 1017948 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +NULL 65534 20.0 +NULL 65536 20.0 +NULL NULL 20.0 +alice allen 65545 20.0 +alice allen 65557 20.0 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out new file mode 100644 index 0000000..0adcbb8 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out @@ -0,0 +1,2659 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 50877 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), i (type: int), b (type: bigint) + sort order: +++ + Map-reduce partition columns: si (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 50877 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: t (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 1, 2, 3] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, KEY.reducesinkkey2:bigint, VALUE._col0:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0, 1, 2] + Statistics: Num rows: 50877 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col1: smallint, _col2: int, _col3: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: first_value_window_0 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongFirstValue] + functionInputExpressions: [col 3] + functionNames: [first_value] + keyInputColumns: [0, 1, 2] + native: true + nonKeyInputColumns: [3] + orderExpressions: [col 1, col 2] + outputColumns: [4, 3, 0, 1, 2] + outputTypes: [tinyint, tinyint, smallint, int, bigint] + partitionExpressions: [col 0] + streamingColumns: [4] + Statistics: Num rows: 50877 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: first_value_window_0 (type: tinyint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4] + Statistics: Num rows: 50877 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select first_value(t) over ( partition by si order by i, b ) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select first_value(t) over ( partition by si order by i, b ) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +first_value_window_0 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +48 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +47 +PREHOOK: query: explain vectorization detail +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), bo (type: boolean), i (type: int), f (type: float) + sort order: +++- + Map-reduce partition columns: si (type: smallint), bo (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 4, 6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: last_value only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey1 (type: boolean) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col4: float, _col6: boolean + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col4 DESC NULLS LAST + partition by: _col1, _col6 + raw input shape: + window functions: + window function definition + alias: last_value_window_0 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE CURRENT~CURRENT + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: last_value_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +last_value_window_0 +65543 +65549 +65558 +65580 +65586 +65596 +65616 +65620 +65627 +65640 +65643 +65706 +65713 +65737 +65744 +65752 +65778 +65540 +65563 +65599 +65604 +65613 +65613 +65615 +65651 +65653 +65668 +65693 +65731 +65733 +65738 +65741 +65744 +65747 +65763 +65778 +65789 +65541 +65547 +65560 +65572 +65574 +65575 +65578 +65588 +65594 +65610 +65691 +65694 +65711 +65719 +65722 +65738 +65756 +65790 +65542 +65557 +65566 +65584 +65610 +65612 +65626 +65631 +65638 +65654 +65654 +65655 +65699 +65712 +65720 +65732 +65748 +65752 +65771 +65771 +65771 +65781 +65565 +65569 +65573 +65582 +65584 +65606 +65656 +65669 +65717 +65724 +65728 +65761 +65762 +65770 +65771 +65781 +65546 +65551 +65551 +65568 +65568 +65579 +65603 +PREHOOK: query: explain vectorization detail +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), bo (type: boolean), i (type: int), f (type: float) + sort order: +++- + Map-reduce partition columns: si (type: smallint), bo (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 4, 6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: row_number only CURRENT ROW end frame is supported for RANGE + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey1 (type: boolean) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col4: float, _col6: boolean + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col4 DESC NULLS LAST + partition by: _col1, _col6 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: row_number_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +row_number_window_0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +1 +2 +3 +4 +5 +6 +7 +PREHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string) + sort order: + + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: si (type: smallint), i (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col1:smallint, VALUE._col2:int + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: smallint), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, smallint, int, string] + streamingColumns: [] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col2 (type: int), avg_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si i avg_window_0 +alice falkner 323 65669 65695.76470588235 +alice falkner 477 65722 65695.76470588235 +alice falkner 455 65718 65695.76470588235 +alice falkner 481 65709 65695.76470588235 +alice falkner 345 65773 65695.76470588235 +alice falkner 280 65597 65695.76470588235 +alice falkner 500 65775 65695.76470588235 +alice falkner 339 65785 65695.76470588235 +alice falkner 452 65596 65695.76470588235 +alice falkner 382 65690 65695.76470588235 +alice falkner 382 65622 65695.76470588235 +alice falkner 393 65611 65695.76470588235 +alice falkner 393 65685 65695.76470588235 +alice falkner 342 65752 65695.76470588235 +alice falkner 311 65715 65695.76470588235 +alice falkner 371 65710 65695.76470588235 +alice falkner 389 65699 65695.76470588235 +alice ichabod 366 65590 65654.95454545454 +alice ichabod 458 65550 65654.95454545454 +alice ichabod 436 65738 65654.95454545454 +alice ichabod 315 65772 65654.95454545454 +alice ichabod 453 65780 65654.95454545454 +alice ichabod 347 65547 65654.95454545454 +alice ichabod 398 65659 65654.95454545454 +alice ichabod 338 65538 65654.95454545454 +alice ichabod 440 65725 65654.95454545454 +alice ichabod 320 65622 65654.95454545454 +alice ichabod 412 65718 65654.95454545454 +alice ichabod 305 65617 65654.95454545454 +alice ichabod 292 65585 65654.95454545454 +alice ichabod 303 65692 65654.95454545454 +alice ichabod 338 65545 65654.95454545454 +alice ichabod 398 65680 65654.95454545454 +alice ichabod 416 65536 65654.95454545454 +alice ichabod 344 65545 65654.95454545454 +alice ichabod 300 65704 65654.95454545454 +alice ichabod 292 65788 65654.95454545454 +alice ichabod 398 65785 65654.95454545454 +alice ichabod 301 65693 65654.95454545454 +alice polk 443 65734 65661.57142857143 +alice polk 444 65564 65661.57142857143 +alice polk 357 65550 65661.57142857143 +alice polk 321 65744 65661.57142857143 +alice polk 273 65548 65661.57142857143 +alice polk 366 65595 65661.57142857143 +alice polk 285 65761 65661.57142857143 +alice polk 466 65561 65661.57142857143 +alice polk 324 65749 65661.57142857143 +alice polk 487 65746 65661.57142857143 +alice polk 378 65598 65661.57142857143 +alice polk 395 65751 65661.57142857143 +alice polk 407 65617 65661.57142857143 +alice polk 507 65744 65661.57142857143 +alice young 468 65649 65706.63636363637 +alice young 489 65646 65706.63636363637 +alice young 286 65705 65706.63636363637 +alice young 447 65789 65706.63636363637 +alice young 425 65677 65706.63636363637 +alice young 282 65671 65706.63636363637 +alice young 351 65776 65706.63636363637 +alice young 308 65776 65706.63636363637 +alice young 314 65791 65706.63636363637 +alice young 419 65735 65706.63636363637 +alice young 383 65558 65706.63636363637 +bob falkner 394 65648 65674.17647058824 +bob falkner 414 65587 65674.17647058824 +bob falkner 389 65738 65674.17647058824 +bob falkner 302 65711 65674.17647058824 +bob falkner 390 65556 65674.17647058824 +bob falkner 357 65566 65674.17647058824 +bob falkner 264 65693 65674.17647058824 +bob falkner 329 65720 65674.17647058824 +bob falkner 317 65624 65674.17647058824 +bob falkner 258 65551 65674.17647058824 +bob falkner 410 65749 65674.17647058824 +bob falkner 330 65727 65674.17647058824 +bob falkner 474 65734 65674.17647058824 +bob falkner 260 65595 65674.17647058824 +bob falkner 459 65746 65674.17647058824 +bob falkner 406 65727 65674.17647058824 +bob falkner 291 65789 65674.17647058824 +bob garcia 422 65655 65675.86666666667 +bob garcia 279 65754 65675.86666666667 +bob garcia 466 65673 65675.86666666667 +bob garcia 416 65582 65675.86666666667 +bob garcia 418 65598 65675.86666666667 +bob garcia 344 65738 65675.86666666667 +bob garcia 320 65585 65675.86666666667 +bob garcia 315 65782 65675.86666666667 +bob garcia 444 65789 65675.86666666667 +bob garcia 354 65687 65675.86666666667 +bob garcia 480 65567 65675.86666666667 +bob garcia 332 65642 65675.86666666667 +bob garcia 361 65737 65675.86666666667 +bob garcia 398 65697 65675.86666666667 +bob garcia 421 65652 65675.86666666667 +bob laertes 423 65663 65671.23529411765 +bob laertes 303 65646 65671.23529411765 +bob laertes 429 65591 65671.23529411765 +bob laertes 446 65602 65671.23529411765 +PREHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), si (type: smallint), i (type: int) + sort order: +++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 2, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1, col 2] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, smallint, int, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col2 (type: int), avg_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si i avg_window_0 +alice allen 400 65557 65557.0 +alice allen 451 65662 65609.5 +alice allen 462 65545 65588.0 +alice allen 472 65609 65593.25 +alice allen 484 65600 65594.6 +alice allen 501 65670 65607.16666666667 +alice allen 501 65720 65623.28571428571 +alice allen 509 65758 65640.125 +alice brown 302 65711 65711.0 +alice brown 324 65569 65640.0 +alice brown 332 65781 65687.0 +alice brown 337 65707 65692.0 +alice brown 346 65696 65692.8 +alice brown 376 65708 65695.33333333333 +alice brown 381 65704 65696.57142857143 +alice brown 399 65779 65706.875 +alice brown 409 65667 65702.44444444444 +alice brown 425 65570 65689.2 +alice brown 452 65666 65687.09090909091 +alice brown 471 65733 65690.91666666667 +alice brown 492 65673 65689.53846153847 +alice brown 499 65790 65696.71428571429 +alice carson 268 65713 65713.0 +alice carson 316 65559 65636.0 +alice carson 318 65695 65655.66666666667 +alice carson 376 65576 65635.75 +alice carson 380 65785 65665.6 +alice carson 390 65747 65679.16666666667 +alice carson 404 65710 65683.57142857143 +alice carson 427 65559 65668.0 +alice carson 473 65565 65656.55555555556 +alice carson 508 65545 65645.4 +alice davidson 270 65563 65563.0 +alice davidson 272 65742 65652.5 +alice davidson 287 65747 65684.0 +alice davidson 298 65554 65651.5 +alice davidson 308 65560 65633.2 +alice davidson 321 65677 65640.5 +alice davidson 328 65547 65627.14285714286 +alice davidson 384 65676 65633.25 +alice davidson 402 65544 65623.33333333333 +alice davidson 408 65707 65631.7 +alice davidson 408 65791 65646.18181818182 +alice davidson 423 65740 65654.0 +alice davidson 431 65677 65655.76923076923 +alice davidson 437 65690 65658.21428571429 +alice davidson 445 65590 65653.66666666667 +alice davidson 448 65641 65652.875 +alice davidson 479 65631 65651.58823529411 +alice davidson 487 65596 65648.5 +alice ellison 256 65744 65744.0 +alice ellison 274 65537 65640.5 +alice ellison 296 65741 65674.0 +alice ellison 313 65612 65658.5 +alice ellison 320 65745 65675.8 +alice ellison 331 65557 65656.0 +alice ellison 335 65730 65666.57142857143 +alice ellison 343 65787 65681.625 +alice ellison 354 65698 65683.44444444444 +alice ellison 355 65699 65685.0 +alice ellison 374 65677 65684.27272727272 +alice ellison 403 65544 65672.58333333333 +alice ellison 405 65713 65675.69230769231 +alice ellison 482 65681 65676.07142857143 +alice ellison 490 65572 65669.13333333333 +alice falkner 280 65597 65597.0 +alice falkner 311 65715 65656.0 +alice falkner 323 65669 65660.33333333333 +alice falkner 339 65785 65691.5 +alice falkner 342 65752 65703.6 +alice falkner 345 65773 65715.16666666667 +alice falkner 371 65710 65714.42857142857 +alice falkner 382 65622 65702.875 +alice falkner 382 65690 65701.44444444444 +alice falkner 389 65699 65701.2 +alice falkner 393 65611 65693.0 +alice falkner 393 65685 65692.33333333333 +alice falkner 452 65596 65684.92307692308 +alice falkner 455 65718 65687.28571428571 +alice falkner 477 65722 65689.6 +alice falkner 481 65709 65690.8125 +alice falkner 500 65775 65695.76470588235 +alice garcia 263 65630 65630.0 +alice garcia 299 65623 65626.5 +alice garcia 309 65746 65666.33333333333 +alice garcia 325 65573 65643.0 +alice garcia 331 65734 65661.2 +alice garcia 366 65744 65675.0 +alice garcia 379 65746 65685.14285714286 +alice garcia 388 65675 65683.875 +alice garcia 427 65674 65682.77777777778 +alice garcia 446 65613 65675.8 +alice garcia 446 65759 65683.36363636363 +alice garcia 459 65712 65685.75 +alice garcia 486 65725 65688.76923076923 +alice hernandez 270 65717 65717.0 +alice hernandez 290 65685 65701.0 +alice hernandez 296 65569 65657.0 +alice hernandez 320 65700 65667.75 +alice hernandez 323 65727 65679.6 +PREHOOK: query: explain vectorization detail +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), si (type: smallint), i (type: int) + sort order: +++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: min_window_0 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongMin] + functionInputExpressions: [col 2] + functionNames: [min] + keyInputColumns: [1, 2, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1, col 2] + outputColumns: [3, 1, 2, 0] + outputTypes: [int, smallint, int, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col2 (type: int), min_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si i min_window_0 +alice allen 400 65557 65557 +alice allen 451 65662 65557 +alice allen 462 65545 65545 +alice allen 472 65609 65545 +alice allen 484 65600 65545 +alice allen 501 65670 65545 +alice allen 501 65720 65545 +alice allen 509 65758 65545 +alice brown 302 65711 65711 +alice brown 324 65569 65569 +alice brown 332 65781 65569 +alice brown 337 65707 65569 +alice brown 346 65696 65569 +alice brown 376 65708 65569 +alice brown 381 65704 65569 +alice brown 399 65779 65569 +alice brown 409 65667 65569 +alice brown 425 65570 65569 +alice brown 452 65666 65569 +alice brown 471 65733 65569 +alice brown 492 65673 65569 +alice brown 499 65790 65569 +alice carson 268 65713 65713 +alice carson 316 65559 65559 +alice carson 318 65695 65559 +alice carson 376 65576 65559 +alice carson 380 65785 65559 +alice carson 390 65747 65559 +alice carson 404 65710 65559 +alice carson 427 65559 65559 +alice carson 473 65565 65559 +alice carson 508 65545 65545 +alice davidson 270 65563 65563 +alice davidson 272 65742 65563 +alice davidson 287 65747 65563 +alice davidson 298 65554 65554 +alice davidson 308 65560 65554 +alice davidson 321 65677 65554 +alice davidson 328 65547 65547 +alice davidson 384 65676 65547 +alice davidson 402 65544 65544 +alice davidson 408 65707 65544 +alice davidson 408 65791 65544 +alice davidson 423 65740 65544 +alice davidson 431 65677 65544 +alice davidson 437 65690 65544 +alice davidson 445 65590 65544 +alice davidson 448 65641 65544 +alice davidson 479 65631 65544 +alice davidson 487 65596 65544 +alice ellison 256 65744 65744 +alice ellison 274 65537 65537 +alice ellison 296 65741 65537 +alice ellison 313 65612 65537 +alice ellison 320 65745 65537 +alice ellison 331 65557 65537 +alice ellison 335 65730 65537 +alice ellison 343 65787 65537 +alice ellison 354 65698 65537 +alice ellison 355 65699 65537 +alice ellison 374 65677 65537 +alice ellison 403 65544 65537 +alice ellison 405 65713 65537 +alice ellison 482 65681 65537 +alice ellison 490 65572 65537 +alice falkner 280 65597 65597 +alice falkner 311 65715 65597 +alice falkner 323 65669 65597 +alice falkner 339 65785 65597 +alice falkner 342 65752 65597 +alice falkner 345 65773 65597 +alice falkner 371 65710 65597 +alice falkner 382 65622 65597 +alice falkner 382 65690 65597 +alice falkner 389 65699 65597 +alice falkner 393 65611 65597 +alice falkner 393 65685 65597 +alice falkner 452 65596 65596 +alice falkner 455 65718 65596 +alice falkner 477 65722 65596 +alice falkner 481 65709 65596 +alice falkner 500 65775 65596 +alice garcia 263 65630 65630 +alice garcia 299 65623 65623 +alice garcia 309 65746 65623 +alice garcia 325 65573 65573 +alice garcia 331 65734 65573 +alice garcia 366 65744 65573 +alice garcia 379 65746 65573 +alice garcia 388 65675 65573 +alice garcia 427 65674 65573 +alice garcia 446 65613 65573 +alice garcia 446 65759 65573 +alice garcia 459 65712 65573 +alice garcia 486 65725 65573 +alice hernandez 270 65717 65717 +alice hernandez 290 65685 65685 +alice hernandez 296 65569 65569 +alice hernandez 320 65700 65569 +alice hernandez 323 65727 65569 +PREHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), si (type: smallint), i (type: int) + sort order: ++- + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaz + reduceColumnSortOrder: ++- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col2 DESC NULLS LAST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 2, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1, col 2] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, smallint, int, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col1 (type: smallint), _col2 (type: int), avg_window_0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s si i avg_window_0 +alice allen 400 65557 65557.0 +alice allen 451 65662 65609.5 +alice allen 462 65545 65588.0 +alice allen 472 65609 65593.25 +alice allen 484 65600 65594.6 +alice allen 501 65720 65615.5 +alice allen 501 65670 65623.28571428571 +alice allen 509 65758 65640.125 +alice brown 302 65711 65711.0 +alice brown 324 65569 65640.0 +alice brown 332 65781 65687.0 +alice brown 337 65707 65692.0 +alice brown 346 65696 65692.8 +alice brown 376 65708 65695.33333333333 +alice brown 381 65704 65696.57142857143 +alice brown 399 65779 65706.875 +alice brown 409 65667 65702.44444444444 +alice brown 425 65570 65689.2 +alice brown 452 65666 65687.09090909091 +alice brown 471 65733 65690.91666666667 +alice brown 492 65673 65689.53846153847 +alice brown 499 65790 65696.71428571429 +alice carson 268 65713 65713.0 +alice carson 316 65559 65636.0 +alice carson 318 65695 65655.66666666667 +alice carson 376 65576 65635.75 +alice carson 380 65785 65665.6 +alice carson 390 65747 65679.16666666667 +alice carson 404 65710 65683.57142857143 +alice carson 427 65559 65668.0 +alice carson 473 65565 65656.55555555556 +alice carson 508 65545 65645.4 +alice davidson 270 65563 65563.0 +alice davidson 272 65742 65652.5 +alice davidson 287 65747 65684.0 +alice davidson 298 65554 65651.5 +alice davidson 308 65560 65633.2 +alice davidson 321 65677 65640.5 +alice davidson 328 65547 65627.14285714286 +alice davidson 384 65676 65633.25 +alice davidson 402 65544 65623.33333333333 +alice davidson 408 65791 65640.1 +alice davidson 408 65707 65646.18181818182 +alice davidson 423 65740 65654.0 +alice davidson 431 65677 65655.76923076923 +alice davidson 437 65690 65658.21428571429 +alice davidson 445 65590 65653.66666666667 +alice davidson 448 65641 65652.875 +alice davidson 479 65631 65651.58823529411 +alice davidson 487 65596 65648.5 +alice ellison 256 65744 65744.0 +alice ellison 274 65537 65640.5 +alice ellison 296 65741 65674.0 +alice ellison 313 65612 65658.5 +alice ellison 320 65745 65675.8 +alice ellison 331 65557 65656.0 +alice ellison 335 65730 65666.57142857143 +alice ellison 343 65787 65681.625 +alice ellison 354 65698 65683.44444444444 +alice ellison 355 65699 65685.0 +alice ellison 374 65677 65684.27272727272 +alice ellison 403 65544 65672.58333333333 +alice ellison 405 65713 65675.69230769231 +alice ellison 482 65681 65676.07142857143 +alice ellison 490 65572 65669.13333333333 +alice falkner 280 65597 65597.0 +alice falkner 311 65715 65656.0 +alice falkner 323 65669 65660.33333333333 +alice falkner 339 65785 65691.5 +alice falkner 342 65752 65703.6 +alice falkner 345 65773 65715.16666666667 +alice falkner 371 65710 65714.42857142857 +alice falkner 382 65690 65711.375 +alice falkner 382 65622 65701.44444444444 +alice falkner 389 65699 65701.2 +alice falkner 393 65685 65699.72727272728 +alice falkner 393 65611 65692.33333333333 +alice falkner 452 65596 65684.92307692308 +alice falkner 455 65718 65687.28571428571 +alice falkner 477 65722 65689.6 +alice falkner 481 65709 65690.8125 +alice falkner 500 65775 65695.76470588235 +alice garcia 263 65630 65630.0 +alice garcia 299 65623 65626.5 +alice garcia 309 65746 65666.33333333333 +alice garcia 325 65573 65643.0 +alice garcia 331 65734 65661.2 +alice garcia 366 65744 65675.0 +alice garcia 379 65746 65685.14285714286 +alice garcia 388 65675 65683.875 +alice garcia 427 65674 65682.77777777778 +alice garcia 446 65759 65690.4 +alice garcia 446 65613 65683.36363636363 +alice garcia 459 65712 65685.75 +alice garcia 486 65725 65688.76923076923 +alice hernandez 270 65717 65717.0 +alice hernandez 290 65685 65701.0 +alice hernandez 296 65569 65657.0 +alice hernandez 320 65700 65667.75 +alice hernandez 323 65727 65679.6 +PREHOOK: query: explain vectorization detail +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: si (type: smallint), bo (type: boolean), i (type: int), f (type: float) + sort order: +++- + Map-reduce partition columns: si (type: smallint), bo (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [1, 2, 4, 6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaz + reduceColumnSortOrder: +++- + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:boolean, KEY.reducesinkkey2:int, KEY.reducesinkkey3:float + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey1 (type: boolean) + outputColumnNames: _col1, _col2, _col4, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 1] + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: smallint, _col2: int, _col4: float, _col6: boolean + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col4 DESC NULLS LAST + partition by: _col1, _col6 + raw input shape: + window functions: + window function definition + alias: max_window_0 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongMax] + functionInputExpressions: [col 2] + functionNames: [max] + keyInputColumns: [0, 2, 3, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 2, col 3] + outputColumns: [4, 0, 2, 3, 1] + outputTypes: [int, smallint, int, float, boolean] + partitionExpressions: [col 0, col 1] + streamingColumns: [] + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col6 (type: boolean), _col2 (type: int), _col4 (type: float), max_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +si bo i f max_window_0 +256 false 65543 32.21 65543 +256 false 65549 23.72 65549 +256 false 65558 71.32 65558 +256 false 65580 64.81 65580 +256 false 65586 12.97 65586 +256 false 65596 5.35 65596 +256 false 65616 76.38 65616 +256 false 65620 51.72 65620 +256 false 65627 54.23 65627 +256 false 65640 32.64 65640 +256 false 65643 94.05 65643 +256 false 65706 83.67 65706 +256 false 65713 21.83 65713 +256 false 65737 3.38 65737 +256 false 65744 47.17 65744 +256 false 65752 61.21 65752 +256 false 65778 16.29 65778 +256 true 65540 49.44 65540 +256 true 65563 94.87 65563 +256 true 65599 89.55 65599 +256 true 65604 40.97 65604 +256 true 65613 93.29 65613 +256 true 65613 78.27 65613 +256 true 65615 20.66 65615 +256 true 65651 90.32 65651 +256 true 65653 8.1 65653 +256 true 65668 92.71 65668 +256 true 65693 62.52 65693 +256 true 65731 34.09 65731 +256 true 65733 70.53 65733 +256 true 65738 9.0 65738 +256 true 65741 54.8 65741 +256 true 65744 38.16 65744 +256 true 65747 32.18 65747 +256 true 65763 24.89 65763 +256 true 65778 74.15 65778 +256 true 65789 91.12 65789 +257 false 65541 51.26 65541 +257 false 65547 54.01 65547 +257 false 65560 42.14 65560 +257 false 65572 79.15 65572 +257 false 65574 19.96 65574 +257 false 65575 1.21 65575 +257 false 65578 61.6 65578 +257 false 65588 81.17 65588 +257 false 65594 78.39 65594 +257 false 65610 98.0 65610 +257 false 65691 80.76 65691 +257 false 65694 29.0 65694 +257 false 65711 60.88 65711 +257 false 65719 62.79 65719 +257 false 65722 79.05 65722 +257 false 65738 96.01 65738 +257 false 65756 24.44 65756 +257 false 65790 9.26 65790 +257 true 65542 62.59 65542 +257 true 65557 55.07 65557 +257 true 65566 68.54 65566 +257 true 65584 35.88 65584 +257 true 65610 47.58 65610 +257 true 65612 3.12 65612 +257 true 65626 23.18 65626 +257 true 65631 51.61 65631 +257 true 65638 95.35 65638 +257 true 65654 24.54 65654 +257 true 65654 9.8 65654 +257 true 65655 40.42 65655 +257 true 65699 15.36 65699 +257 true 65712 90.44 65712 +257 true 65720 24.4 65720 +257 true 65732 96.85 65732 +257 true 65748 32.52 65748 +257 true 65752 49.35 65752 +257 true 65771 95.58 65771 +257 true 65771 53.89 65771 +257 true 65771 48.5 65771 +257 true 65781 17.33 65781 +258 false 65565 98.19 65565 +258 false 65569 66.81 65569 +258 false 65573 31.45 65573 +258 false 65582 67.28 65582 +258 false 65584 64.92 65584 +258 false 65606 35.52 65606 +258 false 65656 79.17 65656 +258 false 65669 75.01 65669 +258 false 65717 95.76 65717 +258 false 65724 70.0 65724 +258 false 65728 9.05 65728 +258 false 65761 33.73 65761 +258 false 65762 15.22 65762 +258 false 65770 13.38 65770 +258 false 65771 52.63 65771 +258 false 65781 1.92 65781 +258 true 65546 91.19 65546 +258 true 65551 91.56 65551 +258 true 65551 88.97 65551 +258 true 65568 81.41 65568 +258 true 65568 13.57 65568 +258 true 65579 47.52 65579 +258 true 65603 2.61 65603 +PREHOOK: query: explain vectorization detail +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: i (type: int), bo (type: boolean), b (type: bigint) + sort order: +++ + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 3, 6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: rank only CURRENT ROW end frame is supported for RANGE + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: boolean) + outputColumnNames: _col2, _col3, _col6 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col3: bigint, _col6: boolean + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST, _col3 ASC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col6, _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: boolean), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 63596 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +bo rank_window_0 +false 1 +false 2 +false 3 +false 4 +false 5 +false 6 +false 7 +false 8 +false 9 +false 10 +false 11 +false 11 +false 13 +false 14 +false 15 +false 16 +false 17 +false 18 +false 19 +false 20 +false 20 +false 22 +true 23 +true 24 +true 25 +true 26 +true 27 +true 28 +true 29 +true 30 +true 31 +true 32 +true 33 +true 34 +true 35 +true 36 +true 37 +true 37 +true 39 +true 40 +true 41 +true 42 +true 43 +true 44 +true 45 +false 1 +false 2 +false 3 +false 4 +false 5 +false 5 +false 5 +false 8 +false 9 +false 10 +false 11 +false 12 +false 13 +false 14 +false 15 +false 16 +false 17 +true 18 +true 19 +true 20 +true 21 +true 22 +true 23 +true 24 +true 25 +true 26 +true 27 +true 27 +true 29 +true 30 +true 31 +true 32 +true 33 +true 34 +true 35 +false 1 +false 2 +false 3 +false 4 +false 4 +false 6 +false 7 +false 8 +false 9 +false 10 +false 11 +false 12 +false 13 +false 14 +false 15 +false 16 +false 17 +false 18 +true 19 +true 20 +PREHOOK: query: explain vectorization detail +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: i (type: int), CAST( s AS CHAR(12) (type: char(12)) + sort order: ++ + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: CastStringGroupToChar(col 7, maxLength 12) -> 11:Char + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + scratchColumnTypeNames: string + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: rank only CURRENT ROW end frame is supported for RANGE + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: CAST( _col7 AS CHAR(12) ASC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: CAST( _col7 AS CHAR(12) + name: rank + window function: GenericUDAFRankEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col7 AS CHAR(12) (type: char(12)), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +_c0 rank_window_0 +alice ichabo 1 +alice robins 2 +bob robinson 3 +calvin thomp 4 +david johnso 5 +david laerte 6 +david nixon 7 +david nixon 7 +ethan johnso 9 +ethan ovid 10 +ethan underh 11 +fred miller 12 +fred miller 12 +gabriella ga 14 +gabriella un 15 +holly white 16 +irene johnso 17 +katie elliso 18 +luke allen 19 +mike quirini 20 +mike white 21 +nick davidso 22 +oscar allen 23 +oscar garcia 24 +oscar ichabo 25 +oscar ovid 26 +oscar steinb 27 +priscilla ga 28 +priscilla wh 29 +priscilla xy 30 +priscilla yo 31 +rachel brown 32 +rachel ichab 33 +rachel xylop 34 +sarah thomps 35 +sarah thomps 35 +tom johnson 37 +tom steinbec 38 +ulysses polk 39 +victor johns 40 +wendy polk 41 +xavier david 42 +yuri ellison 43 +zach allen 44 +zach hernand 45 +alice elliso 1 +bob carson 2 +calvin brown 3 +david xyloph 4 +ethan white 5 +fred johnson 6 +fred van bur 7 +gabriella ic 8 +holly laerte 9 +holly quirin 10 +jessica hern 11 +katie robins 12 +katie thomps 13 +luke nixon 14 +mike garcia 15 +mike hernand 16 +nick carson 17 +nick davidso 18 +oscar carson 19 +oscar robins 20 +priscilla wh 21 +sarah falkne 22 +sarah ichabo 23 +ulysses falk 24 +victor xylop 25 +wendy garcia 26 +wendy van bu 27 +xavier under 28 +yuri garcia 29 +yuri quirini 30 +yuri white 31 +zach falkner 32 +zach ichabod 33 +zach nixon 34 +zach ovid 35 +alice ichabo 1 +alice king 2 +alice robins 3 +calvin allen 4 +gabriella jo 5 +gabriella ni 6 +holly falkne 7 +holly hernan 8 +holly thomps 9 +katie nixon 10 +luke brown 11 +luke davidso 12 +luke white 13 +mike brown 14 +nick quirini 15 +oscar white 16 +priscilla xy 17 +quinn garcia 18 +quinn laerte 19 +rachel young 20 +PREHOOK: query: explain vectorization detail +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: i (type: int), CAST( s AS varchar(12)) (type: varchar(12)) + sort order: ++ + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyExpressions: CastStringGroupToVarChar(col 7, maxLength 12) -> 11:VarChar + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + scratchColumnTypeNames: string + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: rank only CURRENT ROW end frame is supported for RANGE + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: CAST( _col7 AS varchar(12)) ASC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: CAST( _col7 AS varchar(12)) + name: rank + window function: GenericUDAFRankEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( _col7 AS varchar(12)) (type: varchar(12)), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9784 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +_c0 rank_window_0 +alice ichabo 1 +alice robins 2 +bob robinson 3 +calvin thomp 4 +david johnso 5 +david laerte 6 +david nixon 7 +david nixon 7 +ethan johnso 9 +ethan ovid 10 +ethan underh 11 +fred miller 12 +fred miller 12 +gabriella ga 14 +gabriella un 15 +holly white 16 +irene johnso 17 +katie elliso 18 +luke allen 19 +mike quirini 20 +mike white 21 +nick davidso 22 +oscar allen 23 +oscar garcia 24 +oscar ichabo 25 +oscar ovid 26 +oscar steinb 27 +priscilla ga 28 +priscilla wh 29 +priscilla xy 30 +priscilla yo 31 +rachel brown 32 +rachel ichab 33 +rachel xylop 34 +sarah thomps 35 +sarah thomps 35 +tom johnson 37 +tom steinbec 38 +ulysses polk 39 +victor johns 40 +wendy polk 41 +xavier david 42 +yuri ellison 43 +zach allen 44 +zach hernand 45 +alice elliso 1 +bob carson 2 +calvin brown 3 +david xyloph 4 +ethan white 5 +fred johnson 6 +fred van bur 7 +gabriella ic 8 +holly laerte 9 +holly quirin 10 +jessica hern 11 +katie robins 12 +katie thomps 13 +luke nixon 14 +mike garcia 15 +mike hernand 16 +nick carson 17 +nick davidso 18 +oscar carson 19 +oscar robins 20 +priscilla wh 21 +sarah falkne 22 +sarah ichabo 23 +ulysses falk 24 +victor xylop 25 +wendy garcia 26 +wendy van bu 27 +xavier under 28 +yuri garcia 29 +yuri quirini 30 +yuri white 31 +zach falkner 32 +zach ichabod 33 +zach nixon 34 +zach ovid 35 +alice ichabo 1 +alice king 2 +alice robins 3 +calvin allen 4 +gabriella jo 5 +gabriella ni 6 +holly falkne 7 +holly hernan 8 +holly thomps 9 +katie nixon 10 +luke brown 11 +luke davidso 12 +luke white 13 +mike brown 14 +nick quirini 15 +oscar white 16 +priscilla xy 17 +quinn garcia 18 +quinn laerte 19 +rachel young 20 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out new file mode 100644 index 0000000..c30e8eb --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -0,0 +1,1836 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select s, rank() over (partition by f order by t) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, rank() over (partition by f order by t) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: f (type: float), t (type: tinyint) + sort order: ++ + Map-reduce partition columns: f (type: float) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 4, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:float, KEY.reducesinkkey1:tinyint, VALUE._col5:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string) + outputColumnNames: _col0, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col4: float, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 0, 2] + outputTypes: [int, tinyint, float, string] + partitionExpressions: [col 0] + streamingColumns: [3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3] + Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, rank() over (partition by f order by t) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, rank() over (partition by f order by t) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s rank_window_0 +bob ichabod 1 +yuri thompson 2 +luke steinbeck 1 +fred zipper 2 +luke king 3 +calvin van buren 1 +quinn miller 2 +holly steinbeck 1 +david davidson 1 +calvin quirinius 1 +calvin thompson 2 +david ovid 1 +nick zipper 2 +holly thompson 3 +victor steinbeck 1 +victor robinson 2 +zach ovid 1 +ulysses zipper 1 +irene thompson 1 +luke falkner 2 +yuri johnson 1 +ulysses falkner 1 +gabriella robinson 2 +alice robinson 1 +priscilla xylophone 2 +david laertes 1 +mike underhill 2 +victor van buren 1 +holly falkner 1 +priscilla falkner 1 +luke zipper 1 +ethan ovid 2 +alice quirinius 1 +calvin white 2 +mike steinbeck 3 +nick young 1 +wendy polk 2 +irene miller 3 +ethan ellison 1 +yuri davidson 2 +zach hernandez 1 +wendy miller 1 +katie underhill 1 +irene zipper 1 +holly allen 1 +quinn brown 2 +calvin ovid 1 +zach robinson 1 +nick miller 2 +mike allen 1 +priscilla young 1 +yuri van buren 2 +zach miller 3 +sarah falkner 1 +victor xylophone 2 +rachel ichabod 1 +calvin ovid 1 +alice robinson 2 +calvin ovid 1 +alice ovid 1 +david hernandez 2 +luke laertes 3 +luke quirinius 1 +oscar white 1 +zach falkner 1 +rachel thompson 1 +priscilla king 1 +xavier polk 1 +wendy ichabod 1 +rachel ovid 1 +wendy allen 1 +luke brown 1 +oscar ichabod 2 +mike brown 3 +xavier garcia 1 +bob xylophone 1 +yuri brown 2 +ethan quirinius 1 +luke davidson 2 +zach davidson 1 +irene miller 1 +wendy king 1 +bob zipper 1 +sarah thompson 1 +bob laertes 1 +xavier allen 2 +bob carson 3 +sarah robinson 1 +david king 1 +oscar davidson 1 +wendy polk 1 +victor hernandez 2 +david ellison 1 +ulysses johnson 1 +jessica ovid 1 +bob king 1 +ulysses garcia 1 +irene falkner 1 +holly robinson 1 +yuri white 1 +PREHOOK: query: explain vectorization detail +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), i (type: int), s (type: string) + sort order: ++- + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function dense_rank + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col2, _col7, _col8 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col7 DESC NULLS LAST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: dense_rank_window_0 + arguments: _col2, _col7 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), dense_rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s dense_rank_window_0 +rachel thompson 1 +oscar brown 2 +wendy steinbeck 3 +victor van buren 4 +fred zipper 5 +priscilla zipper 6 +katie white 7 +fred nixon 8 +gabriella van buren 9 +luke zipper 10 +victor ellison 11 +david falkner 12 +nick carson 13 +calvin laertes 14 +yuri allen 15 +calvin brown 16 +tom johnson 17 +jessica laertes 18 +sarah falkner 19 +gabriella xylophone 20 +mike laertes 21 +bob ovid 22 +rachel garcia 23 +katie king 24 +calvin steinbeck 25 +jessica polk 26 +xavier davidson 1 +ethan ovid 2 +calvin white 3 +katie zipper 4 +quinn allen 5 +victor underhill 6 +ulysses xylophone 7 +priscilla zipper 8 +quinn ovid 9 +katie xylophone 10 +rachel ovid 11 +yuri brown 12 +oscar van buren 13 +alice miller 14 +luke thompson 15 +gabriella steinbeck 16 +priscilla brown 17 +gabriella underhill 18 +jessica robinson 19 +luke steinbeck 20 +nick ellison 21 +oscar davidson 22 +wendy johnson 23 +ulysses johnson 24 +jessica nixon 25 +fred king 26 +jessica brown 27 +ethan young 28 +xavier johnson 29 +gabriella johnson 30 +calvin nixon 31 +bob king 32 +calvin carson 33 +zach young 34 +yuri hernandez 35 +sarah van buren 36 +holly falkner 37 +jessica brown 38 +rachel ovid 39 +katie davidson 40 +bob falkner 41 +rachel young 42 +irene brown 43 +fred polk 44 +priscilla hernandez 45 +wendy thompson 46 +rachel robinson 47 +luke xylophone 48 +luke king 49 +holly thompson 50 +yuri garcia 1 +nick king 2 +calvin white 3 +rachel polk 4 +rachel davidson 5 +victor hernandez 6 +wendy miller 7 +wendy brown 8 +priscilla thompson 9 +holly nixon 10 +victor hernandez 11 +priscilla polk 12 +ethan nixon 13 +alice underhill 14 +jessica thompson 15 +tom hernandez 16 +sarah falkner 17 +wendy underhill 18 +rachel ichabod 19 +jessica johnson 20 +rachel ellison 21 +wendy falkner 22 +holly allen 23 +ulysses carson 24 +PREHOOK: query: explain vectorization detail +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: bo (type: boolean), b (type: bigint), s (type: string) + sort order: +++ + Map-reduce partition columns: bo (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 6, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col3: bigint, _col6: boolean, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST, _col7 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: cume_dist_window_0 + arguments: _col3, _col7 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), cume_dist_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s cume_dist_window_0 +calvin allen 2.0112630732099757E-4 +david ovid 4.0225261464199515E-4 +david zipper 6.033789219629927E-4 +ethan ellison 8.045052292839903E-4 +holly allen 0.001005631536604988 +irene garcia 0.0012067578439259854 +irene van buren 0.0014078841512469831 +jessica steinbeck 0.0016090104585679806 +katie xylophone 0.0018101367658889783 +mike xylophone 0.002011263073209976 +nick quirinius 0.0022123893805309734 +nick steinbeck 0.002413515687851971 +quinn steinbeck 0.002614641995172969 +rachel thompson 0.0028157683024939663 +sarah miller 0.0030168946098149637 +tom hernandez 0.003218020917135961 +ulysses ichabod 0.003419147224456959 +ulysses nixon 0.0036202735317779565 +ulysses xylophone 0.003821399839098954 +victor garcia 0.004022526146419952 +victor xylophone 0.004223652453740949 +wendy falkner 0.004424778761061947 +yuri nixon 0.004625905068382945 +bob johnson 0.004827031375703942 +bob king 0.00502815768302494 +calvin van buren 0.005229283990345938 +gabriella robinson 0.005430410297666935 +katie xylophone 0.0056315366049879325 +mike steinbeck 0.00583266291230893 +oscar quirinius 0.006033789219629927 +rachel davidson 0.006234915526950925 +sarah van buren 0.006436041834271922 +tom king 0.00663716814159292 +ulysses allen 0.006838294448913918 +wendy ellison 0.007039420756234915 +zach allen 0.007240547063555913 +zach young 0.007441673370876911 +alice falkner 0.007642799678197908 +bob ovid 0.007843925985518906 +bob underhill 0.008045052292839904 +ethan ovid 0.008246178600160902 +gabriella davidson 0.008447304907481898 +gabriella garcia 0.008648431214802896 +irene nixon 0.008849557522123894 +jessica brown 0.009050683829444892 +jessica miller 0.00925181013676589 +jessica quirinius 0.009452936444086887 +luke falkner 0.009654062751407884 +luke robinson 0.009855189058728881 +mike steinbeck 0.01005631536604988 +mike van buren 0.010257441673370877 +priscilla hernandez 0.010458567980691875 +tom polk 0.010659694288012871 +ulysses king 0.01086082059533387 +ulysses robinson 0.011061946902654867 +xavier davidson 0.011263073209975865 +alice hernandez 0.011464199517296863 +bob underhill 0.01166532582461786 +calvin nixon 0.011866452131938857 +david davidson 0.012067578439259855 +holly falkner 0.012268704746580853 +irene laertes 0.01246983105390185 +jessica robinson 0.012670957361222849 +mike falkner 0.012872083668543845 +nick falkner 0.013073209975864843 +oscar laertes 0.01327433628318584 +oscar miller 0.013475462590506838 +oscar thompson 0.013676588897827836 +priscilla nixon 0.013877715205148834 +priscilla xylophone 0.01407884151246983 +quinn miller 0.014279967819790828 +victor robinson 0.014481094127111826 +wendy allen 0.014682220434432824 +wendy nixon 0.014883346741753822 +yuri ellison 0.015084473049074818 +calvin nixon 0.015285599356395816 +fred carson 0.015486725663716814 +holly davidson 0.015687851971037812 +irene king 0.01588897827835881 +jessica davidson 0.016090104585679808 +katie polk 0.016492357200321803 +katie polk 0.016492357200321803 +luke johnson 0.0166934835076428 +nick allen 0.016894609814963796 +nick ellison 0.017095736122284794 +oscar king 0.01729686242960579 +priscilla laertes 0.01749798873692679 +priscilla underhill 0.017699115044247787 +priscilla young 0.017900241351568785 +victor steinbeck 0.018101367658889783 +wendy miller 0.01830249396621078 +calvin carson 0.01850362027353178 +ethan hernandez 0.018704746580852777 +ethan laertes 0.01910699919549477 +ethan laertes 0.01910699919549477 +ethan white 0.019308125502815767 +fred ellison 0.019509251810136765 +gabriella hernandez 0.019710378117457763 +gabriella ovid 0.01991150442477876 +gabriella steinbeck 0.02011263073209976 +PREHOOK: query: explain vectorization detail +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 4710 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: dec (type: decimal(4,2)), f (type: float) + sort order: ++ + Map-reduce partition columns: dec (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4710 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 7, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: percent_rank not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: float), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: decimal(4,2)) + outputColumnNames: _col4, _col7, _col9 + Statistics: Num rows: 4710 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col7: string, _col9: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col9 + raw input shape: + window functions: + window function definition + alias: percent_rank_window_0 + arguments: _col4 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4710 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), percent_rank_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4710 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 21600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 21600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s percent_rank_window_0 +wendy king 0.0 +calvin robinson 1.0 +mike steinbeck 0.0 +calvin hernandez 0.0 +sarah king 1.0 +yuri ellison 0.0 +victor king 0.0 +alice ovid 0.0 +ethan steinbeck 0.5 +mike steinbeck 1.0 +gabriella young 0.0 +jessica johnson 0.0 +holly king 0.5 +tom young 1.0 +victor falkner 0.0 +ethan polk 0.0 +oscar miller 0.0 +ethan quirinius 0.0 +fred hernandez 0.0 +david steinbeck 1.0 +wendy xylophone 0.0 +luke laertes 0.0 +alice quirinius 1.0 +calvin ovid 0.0 +holly allen 0.0 +tom brown 1.0 +wendy ovid 0.0 +mike brown 0.0 +alice polk 0.0 +alice zipper 0.0 +sarah quirinius 1.0 +luke underhill 0.0 +victor white 0.5 +holly xylophone 1.0 +oscar quirinius 0.0 +ethan davidson 0.0 +ethan allen 0.0 +wendy underhill 0.5 +irene xylophone 1.0 +ulysses steinbeck 0.0 +mike hernandez 1.0 +irene brown 0.0 +priscilla brown 0.0 +calvin johnson 1.0 +sarah xylophone 0.0 +yuri underhill 0.5 +ethan nixon 1.0 +calvin hernandez 0.0 +yuri underhill 0.0 +holly allen 1.0 +victor laertes 0.0 +ethan underhill 0.0 +irene steinbeck 1.0 +mike van buren 0.0 +xavier allen 0.5 +sarah xylophone 1.0 +luke van buren 0.0 +gabriella xylophone 0.0 +gabriella ellison 0.0 +luke falkner 0.0 +priscilla garcia 0.0 +ethan quirinius 0.3333333333333333 +alice xylophone 0.6666666666666666 +ethan underhill 1.0 +tom white 0.0 +alice johnson 0.0 +priscilla zipper 0.0 +tom laertes 0.5 +zach laertes 1.0 +xavier miller 0.0 +yuri ovid 0.0 +david steinbeck 0.0 +wendy underhill 0.0 +priscilla xylophone 0.0 +nick hernandez 0.0 +luke steinbeck 0.0 +oscar davidson 0.0 +sarah allen 0.0 +katie steinbeck 0.0 +oscar ovid 1.0 +yuri ellison 0.0 +rachel quirinius 0.0 +irene van buren 0.0 +victor ichabod 0.0 +quinn miller 0.0 +luke allen 0.0 +xavier laertes 0.0 +wendy miller 0.0 +victor brown 0.0 +tom thompson 0.0 +david brown 1.0 +zach quirinius 0.0 +oscar king 1.0 +david nixon 0.0 +ethan white 0.0 +ethan polk 0.0 +ulysses steinbeck 0.0 +victor van buren 0.3333333333333333 +sarah carson 0.6666666666666666 +priscilla nixon 1.0 +PREHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: other + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint), ts (type: timestamp), dec (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 8, 9] + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 8, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Map 4 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) + sort order: ++ + Map-reduce partition columns: _col1 (type: timestamp) + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:decimal(4,2) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: decimal(4,2)) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: timestamp, _col2: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 0, 1] + outputTypes: [int, timestamp, decimal(4,2)] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 2, val 1) -> boolean + predicate: (rank_window_0 = 1) (type: boolean) + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: ConstantVectorExpression(val 1) -> 3:long + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +ts dec rnk +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +2013-03-01 09:11:58.70307 0.50 1 +PREHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: other + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint), ts (type: timestamp), dec (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 8, 9] + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6359 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3, 8, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Map 4 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: timestamp) + sort order: + + Map-reduce partition columns: _col1 (type: timestamp) + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(4,2)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:timestamp, VALUE._col1:decimal(4,2) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, decimal(4,2) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col1 (type: decimal(4,2)) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: timestamp, _col2: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1] + orderExpressions: [col 0] + outputColumns: [2, 0, 1] + outputTypes: [int, timestamp, decimal(4,2)] + streamingColumns: [2] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColEqualDecimalScalar(col 1, val 89.5) -> boolean + predicate: (_col2 = 89.5) (type: boolean) + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: timestamp), 89.5 (type: decimal(4,2)), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 2] + selectExpressions: ConstantVectorExpression(val 89.5) -> 3:decimal(4,2) + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +ts dec rnk +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +2013-03-01 09:11:58.703124 89.50 1 +PREHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: other + Statistics: Num rows: 6204 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessLongScalar(col 0, val 10) -> boolean, SelectColumnIsNotNull(col 3) -> boolean) -> boolean + predicate: ((t < 10) and b is not null) (type: boolean) + Statistics: Num rows: 2068 Data size: 339181 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint), ts (type: timestamp), dec (type: decimal(4,2)) + outputColumnNames: _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 8, 9] + Statistics: Num rows: 2068 Data size: 339181 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2068 Data size: 339181 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 3, 8, 9] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Map 4 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3) -> boolean + predicate: b is not null (type: boolean) + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3] + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [3] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)) + sort order: ++ + Map-reduce partition columns: _col2 (type: timestamp) + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:decimal(4,2) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: decimal(4,2)) + outputColumnNames: _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: timestamp, _col3: decimal(4,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 0, 1] + outputTypes: [int, timestamp, decimal(4,2)] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 139912 Data size: 1119298 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 2, val 1) -> boolean + predicate: (rank_window_0 = 1) (type: boolean) + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)), 1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3] + selectExpressions: ConstantVectorExpression(val 1) -> 3:long + Statistics: Num rows: 69956 Data size: 559649 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +ts dec rnk +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 +2013-03-01 09:11:58.70307 37.30 1 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out new file mode 100644 index 0000000..b6c0a64 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -0,0 +1,1033 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 1, 0] + outputTypes: [int, string, string] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 1, 0] + outputTypes: [int, string, string] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 2, val 4) -> boolean + predicate: (rank_window_0 < 4) (type: boolean) + Statistics: Num rows: 8 Data size: 3896 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +a.p_mfgr a.r +Manufacturer#1 1 +Manufacturer#1 1 +Manufacturer#1 3 +Manufacturer#2 1 +Manufacturer#2 2 +Manufacturer#2 3 +Manufacturer#3 1 +Manufacturer#3 2 +Manufacturer#3 3 +Manufacturer#4 1 +Manufacturer#4 2 +Manufacturer#4 3 +Manufacturer#5 1 +Manufacturer#5 2 +Manufacturer#5 3 +PREHOOK: query: select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +a.p_mfgr a.r +Manufacturer#1 1 +Manufacturer#1 1 +Manufacturer#2 1 +Manufacturer#3 1 +Manufacturer#4 1 +Manufacturer#5 1 +PREHOOK: query: explain vectorization detail +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0, val 5) -> boolean + predicate: (t < 5) (type: boolean) + Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: t (type: tinyint), f (type: float) + sort order: ++ + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.8 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 4] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:float + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: float) + outputColumnNames: _col0, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col4: float + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col4 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 0, 1] + outputTypes: [int, tinyint, float] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 2, val 6) -> boolean + predicate: (rank_window_0 < 6) (type: boolean) + Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col4 (type: float), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +a.t a.f a.r +-3 0.56 1 +-3 0.83 2 +-3 2.26 3 +-3 2.48 4 +-3 3.82 5 +-2 1.55 1 +-2 1.65 2 +-2 1.79 3 +-2 4.06 4 +-2 4.4 5 +-1 0.79 1 +-1 0.95 2 +-1 1.27 3 +-1 1.49 4 +-1 2.8 5 +0 0.08 1 +0 0.94 2 +0 1.44 3 +0 2.0 4 +0 2.12 5 +1 0.13 1 +1 0.44 2 +1 1.04 3 +1 3.41 4 +1 3.45 5 +2 2.21 1 +2 3.1 2 +2 9.93 3 +2 11.43 4 +2 15.45 5 +3 0.12 1 +3 0.19 2 +3 7.14 3 +3 7.97 4 +3 8.95 5 +4 2.26 1 +4 5.51 2 +4 5.53 3 +4 5.76 4 +4 7.26 5 +PREHOOK: query: select * +from (select t, f, row_number() over(partition by t order by f) r from over10k) a +where r < 8 and t < 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select * +from (select t, f, row_number() over(partition by t order by f) r from over10k) a +where r < 8 and t < 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +a.t a.f a.r +-3 0.56 1 +-3 0.83 2 +-3 2.26 3 +-3 2.48 4 +-3 3.82 5 +-3 6.8 6 +-3 6.83 7 +-2 1.55 1 +-2 1.65 2 +-2 1.79 3 +-2 4.06 4 +-2 4.4 5 +-2 5.43 6 +-2 5.59 7 +-1 0.79 1 +-1 0.95 2 +-1 1.27 3 +-1 1.49 4 +-1 2.8 5 +-1 4.08 6 +-1 4.31 7 +PREHOOK: query: explain vectorization detail +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: ctinyint (type: tinyint), cdouble (type: double) + sort order: ++ + Map-reduce partition columns: ctinyint (type: tinyint) + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 < 5) (type: boolean) + Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table if exists sB +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists sB +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@sB +POSTHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sB +POSTHOOK: Lineage: sb.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: sb.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: sb.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +a.ctinyint a.cdouble a.r +PREHOOK: query: select * from sB +where ctinyint is null +PREHOOK: type: QUERY +PREHOOK: Input: default@sb +#### A masked pattern was here #### +POSTHOOK: query: select * from sB +where ctinyint is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sb +#### A masked pattern was here #### +sb.ctinyint sb.cdouble sb.r +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +PREHOOK: query: drop table if exists sD +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists sD +POSTHOOK: type: DROPTABLE +PREHOOK: query: explain vectorization detail +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Reduce Output Operator + key expressions: ctinyint (type: tinyint), cdouble (type: double) + sort order: ++ + Map-reduce partition columns: ctinyint (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false + Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.8 + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1] + functionNames: [rank] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 0, 1] + outputTypes: [int, tinyint, double] + partitionExpressions: [col 0] + streamingColumns: [2] + Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 2, val 5) -> boolean + predicate: (rank_window_0 < 5) (type: boolean) + Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.sD + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: ctinyint tinyint, cdouble double, r int + field delimiter: , + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.sD + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: database:default +PREHOOK: Output: default@sD +POSTHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sD +POSTHOOK: Lineage: sd.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: sd.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: sd.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +a.ctinyint a.cdouble a.r +PREHOOK: query: select * from sD +where ctinyint is null +PREHOOK: type: QUERY +PREHOOK: Input: default@sd +#### A masked pattern was here #### +POSTHOOK: query: select * from sD +where ctinyint is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sd +#### A masked pattern was here #### +sd.ctinyint sd.cdouble sd.r +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 +NULL NULL 1 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out new file mode 100644 index 0000000..9be1015 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -0,0 +1,2341 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization detail +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: i (type: int), s (type: string), b (type: bigint) + sort order: +++ + Map-reduce partition columns: i (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 3, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col2, _col3, _col7 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col3: bigint, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col3 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s sum_window_0 +alice ichabod 4294967441 +alice robinson 8589934917 +bob robinson 12884902266 +calvin thompson 17179869602 +david johnson 21474837092 +david laertes 25769804523 +david nixon 30064771904 +david nixon 34359739395 +ethan johnson 38654706752 +ethan ovid 42949674180 +ethan underhill 47244641690 +fred miller 51539609102 +fred miller 55834576592 +gabriella garcia 60129544023 +gabriella underhill 64424511330 +holly white 68719478650 +irene johnson 73014446110 +katie ellison 77309413485 +luke allen 81604380948 +mike quirinius 85899348426 +mike white 90194315855 +nick davidson 94489283385 +oscar allen 98784250693 +oscar garcia 103079218190 +oscar ichabod 107374185594 +oscar ovid 111669153102 +oscar steinbeck 115964120553 +priscilla garcia 120259087901 +priscilla white 124554055390 +priscilla xylophone 128849022850 +priscilla young 133143990191 +rachel brown 137438957640 +rachel ichabod 141733924974 +rachel xylophone 146028892291 +sarah thompson 150323859590 +sarah thompson 154618826928 +tom johnson 158913794359 +tom steinbeck 163208761724 +ulysses polk 167503729208 +victor johnson 171798696592 +wendy polk 176093663918 +xavier davidson 180388631312 +yuri ellison 184683598825 +zach allen 188978566334 +zach hernandez 193273533646 +alice ellison 4294967446 +bob carson 8589934892 +calvin brown 12884902329 +david xylophone 17179869748 +ethan white 21474837241 +fred johnson 25769804704 +fred van buren 30064772167 +gabriella ichabod 34359739606 +holly laertes 38654707054 +holly quirinius 42949674584 +jessica hernandez 47244642120 +katie robinson 51539609539 +katie thompson 55834576895 +luke nixon 60129544345 +mike garcia 64424511764 +mike hernandez 68719479285 +nick carson 73014446621 +nick davidson 77309414083 +oscar carson 81604381543 +oscar robinson 85899348869 +priscilla white 90194316274 +sarah falkner 94489283722 +sarah ichabod 98784251271 +ulysses falkner 103079218819 +victor xylophone 107374186359 +wendy garcia 111669153733 +wendy van buren 115964121147 +xavier underhill 120259088561 +yuri garcia 124554056001 +yuri quirinius 128849023443 +yuri white 133143990852 +zach falkner 137438958357 +zach ichabod 141733925776 +zach nixon 146028893205 +zach ovid 150323860576 +alice ichabod 4294967451 +alice king 8589934958 +alice robinson 12884902278 +calvin allen 17179869612 +gabriella johnson 21474837108 +gabriella nixon 25769804436 +holly falkner 30064771905 +holly hernandez 34359739256 +holly thompson 38654706595 +katie nixon 42949674112 +luke brown 47244641636 +luke davidson 51539608978 +luke white 55834576299 +mike brown 60129543641 +nick quirinius 64424511126 +oscar white 68719478551 +priscilla xylophone 73014446004 +quinn garcia 77309413317 +quinn laertes 81604380656 +rachel young 85899348171 +PREHOOK: query: explain vectorization detail +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: d (type: double), s (type: string), f (type: float) + sort order: +++ + Map-reduce partition columns: d (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col4, _col5, _col7 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col4 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), sum_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s sum_window_0 +calvin miller 8.390000343322754 +holly polk 5.289999961853027 +wendy quirinius 30.789999961853027 +yuri laertes 68.38000011444092 +nick steinbeck 79.23999786376953 +katie brown 60.0 +priscilla quirinius 137.83999633789062 +tom young 186.33999633789062 +gabriella quirinius 14.359999656677246 +katie falkner 65.92999935150146 +xavier robinson 153.84000301361084 +ethan carson 40.90999984741211 +victor johnson 100.0 +jessica king 92.70999908447266 +jessica white 124.16999816894531 +zach white 170.71999740600586 +holly falkner 97.3499984741211 +quinn falkner 196.23999786376953 +victor davidson 255.95999908447266 +holly young 19.110000610351562 +nick robinson 13.329999923706055 +xavier steinbeck 48.53999900817871 +irene king 30.469999313354492 +quinn zipper 90.04000091552734 +priscilla miller 15.359999656677246 +wendy zipper 92.8000020980835 +yuri miller 153.5600004196167 +zach steinbeck 9.069999694824219 +fred nixon 50.08000183105469 +katie brown 13.300000190734863 +nick davidson 87.05000305175781 +gabriella davidson 3.940000057220459 +zach carson 70.88999700546265 +holly hernandez 48.52000045776367 +jessica quirinius 90.18000030517578 +tom xylophone 166.11000061035156 +wendy king 184.76000022888184 +gabriella brown 84.83000183105469 +quinn johnson 134.9800033569336 +yuri zipper 205.75 +david robinson 64.79000091552734 +mike nixon 153.7300033569336 +gabriella white 1.4199999570846558 +rachel davidson 98.12999904155731 +yuri garcia 9.880000114440918 +yuri zipper 104.01999950408936 +alice king 85.72000122070312 +jessica steinbeck 111.41000175476074 +katie hernandez 178.9699993133545 +katie ovid 40.0 +priscilla young 101.72999954223633 +quinn davidson 196.8400001525879 +quinn van buren 279.6400032043457 +victor steinbeck 309.6400032043457 +gabriella brown 80.6500015258789 +jessica ichabod 96.54000091552734 +zach laertes 104.50000095367432 +ethan miller 49.61000061035156 +irene carson 110.68000030517578 +irene falkner 131.42000007629395 +priscilla zipper 201.39000129699707 +tom robinson 290.75000190734863 +katie polk 38.689998626708984 +nick white 96.93999862670898 +sarah davidson 99.59999871253967 +xavier laertes 161.30999779701233 +alice ichabod 32.689998626708984 +nick polk 130.97999954223633 +gabriella robinson 90.0999984741211 +luke brown 90.71999847888947 +wendy allen 116.34999763965607 +calvin ichabod 29.059999465942383 +holly steinbeck 98.4799976348877 +gabriella carson 38.09000015258789 +holly van buren 106.89999771118164 +tom nixon 191.92999649047852 +katie laertes 75.75 +mike brown 163.97000122070312 +oscar nixon 24.020000457763672 +zach garcia 101.61999893188477 +tom polk 76.98999786376953 +mike allen 96.44999694824219 +alice johnson 1.090000033378601 +holly robinson 26.209999084472656 +priscilla thompson 111.12999725341797 +yuri young 168.73999786376953 +rachel carson 80.98999786376953 +gabriella laertes 39.81999969482422 +victor brown 78.97999954223633 +bob carson 24.149999618530273 +holly allen 68.71999931335449 +fred nixon 38.04999923706055 +rachel carson 119.60000228881836 +alice nixon 49.130001068115234 +priscilla brown 123.57999801635742 +victor falkner 42.4900016784668 +david garcia 67.27999877929688 +holly hernandez 116.36999893188477 +tom white 154.0 +rachel ellison 10.600000381469727 +PREHOOK: query: explain vectorization detail +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), f (type: float) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: s (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: float), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col4, _col7, _col8 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), sum_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s sum_window_0 +gabriella xylophone 1276.850001335144 +calvin brown 1273.68000125885 +jessica laertes 1262.7900009155273 +yuri allen 1248.2500009536743 +tom johnson 1233.4700012207031 +bob ovid 1215.6200008392334 +fred nixon 1195.0100002288818 +oscar brown 1166.3199996948242 +calvin laertes 1137.1000003814697 +david falkner 1105.9300003051758 +calvin steinbeck 1067.5800018310547 +katie white 1028.9700012207031 +sarah falkner 989.4900016784668 +mike laertes 948.9500007629395 +victor ellison 907.3500022888184 +luke zipper 861.2700004577637 +rachel garcia 806.9099998474121 +wendy steinbeck 749.9700012207031 +priscilla zipper 685.0100021362305 +rachel thompson 611.4900054931641 +victor van buren 532.9100036621094 +fred zipper 451.5 +gabriella van buren 366.79000091552734 +nick carson 279.36000061035156 +katie king 188.0 +jessica polk 95.04000091552734 +oscar davidson 2368.430002987385 +xavier johnson 2367.600003004074 +rachel ovid 2365.6100029945374 +xavier davidson 2361.880002975464 +nick ellison 2353.0200033187866 +jessica robinson 2342.4000034332275 +bob king 2331.0800037384033 +ulysses xylophone 2318.2500038146973 +wendy thompson 2303.550004005432 +yuri brown 2288.590003967285 +ethan ovid 2271.010004043579 +rachel robinson 2251.9100036621094 +holly falkner 2230.9000034332275 +calvin nixon 2203.950002670288 +luke thompson 2176.7200031280518 +gabriella johnson 2147.6500034332275 +jessica brown 2117.940004348755 +quinn allen 2086.100004196167 +irene brown 2054.1600036621094 +katie zipper 2018.8400039672852 +gabriella steinbeck 1981.520004272461 +priscilla brown 1943.020004272461 +zach young 1900.9400024414062 +alice miller 1856.6400032043457 +priscilla zipper 1811.9800033569336 +rachel young 1765.1400032043457 +holly thompson 1716.2500038146973 +calvin white 1666.6100044250488 +priscilla hernandez 1616.330005645752 +fred polk 1564.240005493164 +sarah van buren 1510.9800071716309 +rachel ovid 1456.890007019043 +luke xylophone 1400.4400062561035 +yuri hernandez 1343.6800079345703 +oscar van buren 1282.2700080871582 +quinn ovid 1220.390007019043 +victor underhill 1157.360008239746 +luke king 1092.8100051879883 +calvin carson 1024.1900024414062 +jessica brown 948.0600051879883 +jessica nixon 869.0100021362305 +katie davidson 788.5800018310547 +fred king 707.1699981689453 +wendy johnson 624.3199996948242 +ulysses johnson 540.3399963378906 +katie xylophone 456.12999725341797 +ethan young 370.57999420166016 +gabriella underhill 282.6499938964844 +luke steinbeck 193.7199935913086 +bob falkner 99.44999694824219 +holly allen 1607.950005441904 +rachel ichabod 1607.590005427599 +bob carson 1607.1100054383278 +wendy miller 1606.3200054168701 +nick king 1605.0500054359436 +rachel ellison 1600.5700054168701 +yuri garcia 1591.5700054168701 +victor hernandez 1568.3000049591064 +wendy underhill 1543.1700057983398 +alice underhill 1517.830005645752 +rachel polk 1491.9200057983398 +holly nixon 1462.910005569458 +ethan nixon 1432.4400062561035 +sarah falkner 1394.490005493164 +tom hernandez 1355.1900062561035 +rachel ichabod 1309.2800064086914 +priscilla thompson 1256.8400077819824 +jessica thompson 1202.7400093078613 +ulysses carson 1146.0400085449219 +wendy falkner 1087.2700080871582 +calvin white 1025.1800079345703 +jessica ovid 956.9800109863281 +jessica johnson 885.3000106811523 +priscilla garcia 805.8400115966797 +PREHOOK: query: explain vectorization detail +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), s (type: string), f (type: float) + sort order: +++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col4, _col7, _col8 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col4 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col4 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS CURRENT~FOLLOWING(5) + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s avg_window_0 +bob ovid 28.053333441416424 +calvin brown 38.73666652043661 +calvin laertes 51.493333180745445 +calvin steinbeck 46.826666514078774 +david falkner 42.81499973932902 +fred nixon 52.26333347956339 +fred zipper 62.97499990463257 +gabriella van buren 55.43666664759318 +gabriella xylophone 49.925000031789146 +jessica laertes 56.32999976476034 +jessica polk 69.13333320617676 +katie king 58.16333293914795 +katie white 54.92333253224691 +luke zipper 57.83333237965902 +mike laertes 61.86999924977621 +nick carson 61.69333299001058 +oscar brown 49.44166628519694 +priscilla zipper 52.25166670481364 +rachel garcia 53.56666787465414 +rachel thompson 54.903334617614746 +sarah falkner 44.27000093460083 +tom johnson 45.01600093841553 +victor ellison 51.80750107765198 +victor van buren 53.71666749318441 +wendy steinbeck 39.869999408721924 +yuri allen 14.779999732971191 +alice miller 51.76333204905192 +bob falkner 47.50333213806152 +bob king 45.58333269755045 +calvin carson 57.253332455952965 +calvin nixon 53.441665967305504 +calvin white 53.85499922434489 +ethan ovid 51.891666094462074 +ethan young 63.52999941507975 +fred king 53.36666615804037 +fred polk 47.83166631062826 +gabriella johnson 44.84166653951009 +gabriella steinbeck 45.1966667175293 +gabriella underhill 51.95500055948893 +holly falkner 50.538333892822266 +holly thompson 47.93333371480306 +irene brown 53.22833442687988 +jessica brown 61.600001653035484 +jessica brown 62.51333491007487 +jessica nixon 60.775001525878906 +jessica robinson 63.08166758219401 +katie davidson 66.04000091552734 +katie xylophone 61.931666692097984 +katie zipper 49.44333283106486 +luke king 43.36166621247927 +luke steinbeck 42.238332599401474 +luke thompson 33.54000013073286 +luke xylophone 37.376666873693466 +nick ellison 35.72333384553591 +oscar davidson 39.27666728695234 +oscar van buren 49.643333752950035 +priscilla brown 39.95166691144308 +priscilla hernandez 42.346666733423866 +priscilla zipper 37.166666746139526 +quinn allen 37.50833328564962 +quinn ovid 41.199999888738 +rachel ovid 44.729999939600624 +rachel ovid 46.558333237965904 +rachel robinson 47.90833361943563 +rachel young 58.40333414077759 +sarah van buren 52.74833424886068 +ulysses johnson 45.21000083287557 +ulysses xylophone 31.506667653719585 +victor underhill 31.98666767279307 +wendy johnson 31.46333380540212 +wendy thompson 24.84999978542328 +xavier davidson 26.82799973487854 +xavier johnson 31.319999754428864 +yuri brown 41.09666633605957 +yuri hernandez 52.85499954223633 +zach young 44.29999923706055 +alice underhill 38.0366666217645 +bob carson 38.7966665327549 +calvin white 51.90833304325739 +ethan ichabod 52.48833360274633 +ethan nixon 46.103333373864494 +holly allen 40.5249999165535 +holly nixon 55.85333355267843 +jessica johnson 64.11166644096375 +jessica ovid 66.54166674613953 +jessica thompson 69.09166725476582 +nick king 68.65833353996277 +oscar carson 82.59166717529297 +priscilla garcia 80.75166702270508 +priscilla hernandez 68.91500091552734 +priscilla polk 53.32166742781798 +priscilla thompson 47.56499997278055 +quinn van buren 43.383333598574005 +rachel davidson 35.253333166241646 +rachel ellison 29.356666321555775 +rachel ichabod 37.651666397849716 +rachel ichabod 41.75999959309896 +rachel polk 49.56333351135254 +sarah falkner 59.53333377838135 +tom hernandez 63.331667264302574 +PREHOOK: query: explain vectorization detail +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: t (type: tinyint), s (type: string), d (type: double) + sort order: ++- + Map-reduce partition columns: t (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [0, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: avg only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col5, _col7 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: tinyint, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col5 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(5)~FOLLOWING(5) + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), avg_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s avg_window_0 +alice allen 33.20166666666666 +alice davidson 30.741428571428568 +alice falkner 27.742499999999996 +alice king 26.706666666666663 +alice king 26.306999999999995 +alice xylophone 24.458181818181814 +bob ellison 25.029090909090908 +bob falkner 24.216363636363635 +bob ichabod 20.173636363636362 +bob johnson 16.431818181818176 +bob polk 16.640909090909087 +bob underhill 15.266363636363632 +bob underhill 18.288181818181812 +bob van buren 18.405454545454543 +calvin ichabod 20.90363636363636 +calvin white 22.448181818181812 +david carson 24.329090909090898 +david falkner 25.01181818181817 +david garcia 22.984545454545444 +david hernandez 22.92272727272726 +ethan steinbeck 24.026363636363627 +ethan underhill 25.189090909090904 +fred ellison 27.159999999999993 +gabriella brown 25.66454545454545 +holly nixon 25.70545454545454 +holly polk 24.11818181818182 +holly steinbeck 24.49090909090909 +holly thompson 23.376363636363635 +holly underhill 19.453636363636363 +irene ellison 20.378181818181826 +irene underhill 23.510000000000012 +irene young 25.371818181818195 +jessica johnson 24.42636363636365 +jessica king 26.380000000000017 +jessica miller 23.99545454545456 +jessica white 26.866363636363655 +katie ichabod 28.520909090909115 +luke garcia 26.110909090909114 +luke ichabod 27.41909090909093 +luke king 28.713636363636375 +luke young 30.59181818181818 +mike allen 27.91545454545455 +mike king 25.526363636363644 +mike polk 24.774545454545464 +mike white 25.18363636363637 +mike xylophone 27.50818181818182 +nick nixon 26.225454545454546 +nick robinson 24.34454545454545 +oscar davidson 26.719090909090916 +oscar garcia 27.196363636363643 +oscar johnson 27.08272727272728 +oscar johnson 25.164545454545472 +oscar miller 28.059090909090916 +priscilla laertes 31.73727272727274 +priscilla quirinius 30.353636363636372 +priscilla zipper 27.961818181818195 +quinn ellison 29.40636363636366 +quinn polk 27.267272727272754 +rachel davidson 25.415454545454562 +rachel thompson 23.608181818181823 +sarah miller 21.49909090909091 +sarah robinson 23.40454545454546 +sarah xylophone 26.957272727272724 +sarah zipper 24.83545454545455 +tom hernandez 21.274545454545454 +tom hernandez 20.315454545454546 +tom polk 21.90181818181819 +tom steinbeck 20.772727272727273 +ulysses carson 21.647272727272718 +ulysses ellison 22.960909090909084 +ulysses quirinius 23.025454545454544 +ulysses robinson 23.762727272727282 +ulysses steinbeck 21.08909090909091 +victor allen 16.628181818181826 +victor hernandez 15.74909090909091 +victor robinson 18.193636363636355 +victor thompson 20.81181818181817 +victor xylophone 20.372727272727243 +wendy quirinius 20.81636363636362 +wendy robinson 19.936363636363634 +wendy xylophone 20.270909090909093 +xavier garcia 19.874000000000002 +xavier ovid 19.976666666666663 +yuri xylophone 21.89625000000001 +zach thompson 25.021428571428583 +zach young 27.77666666666668 +alice carson 18.785 +alice nixon 17.58142857142857 +alice underhill 17.072499999999998 +alice underhill 19.146666666666665 +alice xylophone 20.556 +bob falkner 19.116363636363637 +bob king 21.04 +bob ovid 20.854545454545452 +bob van buren 21.988181818181815 +bob xylophone 24.364545454545453 +calvin xylophone 26.91272727272727 +david falkner 27.31 +david laertes 28.00454545454545 +david miller 28.40090909090909 +PREHOOK: query: explain vectorization detail +select s, sum(i) over(partition by ts order by s) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, sum(i) over(partition by ts order by s) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), s (type: string) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: i (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 7, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:string, VALUE._col2:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col2, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1, 0] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col7: string, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 2] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 2, 1, 0] + outputTypes: [bigint, int, string, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] + Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select s, sum(i) over(partition by ts order by s) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(i) over(partition by ts order by s) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s sum_window_0 +bob ovid 65748 +calvin brown 131440 +calvin laertes 197097 +calvin steinbeck 262874 +david falkner 328506 +fred nixon 394118 +fred zipper 459719 +gabriella van buren 525334 +gabriella xylophone 591058 +jessica laertes 656771 +jessica polk 722558 +katie king 788310 +katie white 853920 +luke zipper 919543 +mike laertes 985277 +nick carson 1050928 +oscar brown 1116474 +priscilla zipper 1182084 +rachel garcia 1247836 +rachel thompson 1313378 +sarah falkner 1379093 +tom johnson 1444791 +victor ellison 1510421 +victor van buren 1576006 +wendy steinbeck 1641591 +yuri allen 1707256 +alice miller 65581 +bob falkner 131319 +bob king 197015 +calvin carson 262712 +calvin nixon 328407 +calvin white 393960 +ethan ovid 459504 +ethan young 525178 +fred king 590838 +fred polk 656600 +gabriella johnson 722283 +gabriella steinbeck 787886 +gabriella underhill 853497 +holly falkner 919218 +holly thompson 985000 +irene brown 1050757 +jessica brown 1182155 +jessica brown 1182155 +jessica nixon 1247815 +jessica robinson 1313437 +katie davidson 1379172 +katie xylophone 1444746 +katie zipper 1510302 +luke king 1576084 +luke steinbeck 1641724 +luke thompson 1707324 +luke xylophone 1773102 +nick ellison 1838744 +oscar davidson 1904390 +oscar van buren 1969971 +priscilla brown 2035582 +priscilla hernandez 2101353 +priscilla zipper 2166925 +quinn allen 2232487 +quinn ovid 2298060 +rachel ovid 2429366 +rachel ovid 2429366 +rachel robinson 2495140 +rachel young 2560880 +sarah van buren 2626599 +ulysses johnson 2692259 +ulysses xylophone 2757830 +victor underhill 2823401 +wendy johnson 2889058 +wendy thompson 2954831 +xavier davidson 3020367 +xavier johnson 3086050 +yuri brown 3151628 +yuri hernandez 3217338 +zach young 3283046 +alice underhill 65705 +bob carson 131461 +calvin white 197044 +ethan ichabod 262796 +ethan nixon 328501 +holly allen 394248 +holly nixon 459928 +jessica johnson 525664 +jessica ovid 591415 +jessica thompson 657122 +nick king 722691 +oscar carson 788459 +priscilla garcia 854222 +priscilla hernandez 919979 +priscilla polk 985680 +priscilla thompson 1051347 +quinn van buren 1117102 +rachel davidson 1182710 +rachel ellison 1248448 +rachel ichabod 1379923 +rachel ichabod 1379923 +rachel polk 1445518 +sarah falkner 1511234 +tom hernandez 1576947 +PREHOOK: query: explain vectorization detail +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), f (type: float) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:float + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col4, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum] + functionInputExpressions: [col 1] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1] + outputColumns: [2, 1, 0] + outputTypes: [double, float, timestamp] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: float), sum_window_0 (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 100 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +f sum_window_0 +3.17 3.1700000762939453 +10.89 14.0600004196167 +14.54 28.600000381469727 +14.78 43.38000011444092 +17.85 61.230000495910645 +20.61 81.8400011062622 +28.69 110.53000164031982 +29.22 139.75000095367432 +31.17 170.92000102996826 +38.35 209.26999950408936 +38.61 247.88000011444092 +39.48 287.35999965667725 +40.54 327.9000005722046 +41.6 369.4999990463257 +46.08 415.58000087738037 +54.36 469.94000148773193 +56.94 526.8800001144409 +64.96 591.8399991989136 +73.52 665.35999584198 +78.58 743.9399976730347 +81.41 825.350001335144 +84.71 910.0600004196167 +87.43 997.4900007247925 +91.36 1088.850001335144 +92.96 1181.8100004196167 +95.04 1276.850001335144 +0.83 0.8299999833106995 +1.99 2.8199999928474426 +3.73 6.550000011920929 +8.86 15.409999668598175 +10.62 26.029999554157257 +11.32 37.349999248981476 +12.83 50.17999917268753 +14.7 64.87999898195267 +14.96 79.83999902009964 +17.58 97.4199989438057 +19.1 116.51999932527542 +21.01 137.52999955415726 +26.95 164.4800003170967 +27.23 191.70999985933304 +29.07 220.77999955415726 +29.71 250.4899986386299 +31.84 282.3299987912178 +31.94 314.2699993252754 +35.32 349.58999902009964 +37.32 386.90999871492386 +38.5 425.40999871492386 +42.08 467.49000054597855 +44.3 511.7899997830391 +44.66 556.4499996304512 +46.84 603.2899997830391 +48.89 652.1799991726875 +49.64 701.819998562336 +50.28 752.0999973416328 +52.09 804.1899974942207 +53.26 857.4499958157539 +54.09 911.5399959683418 +56.45 967.9899967312813 +56.76 1024.7499950528145 +61.41 1086.1599949002266 +61.88 1148.0399959683418 +63.03 1211.0699947476387 +64.55 1275.6199977993965 +68.62 1344.2400005459785 +76.13 1420.3699977993965 +79.05 1499.4200008511543 +80.43 1579.85000115633 +81.41 1661.2600048184395 +82.85 1744.1100032925606 +83.98 1828.0900066494942 +84.21 1912.3000057339668 +85.55 1997.8500087857246 +87.93 2085.7800090909004 +88.93 2174.710009396076 +94.27 2268.9800060391426 +99.45 2368.430002987385 +0.36 0.36000001430511475 +0.48 0.8400000035762787 +0.79 1.6300000250339508 +1.27 2.9000000059604645 +4.48 7.380000025033951 +9.0 16.38000002503395 +23.27 39.65000048279762 +25.13 64.77999964356422 +25.34 90.11999979615211 +25.91 116.02999964356422 +29.01 145.03999987244606 +30.47 175.50999918580055 +37.95 213.45999994874 +39.3 252.75999918580055 +45.91 298.66999903321266 +52.44 351.10999765992165 +54.1 405.20999613404274 +56.7 461.9099968969822 +58.77 520.6799973547459 +62.09 582.7699975073338 +68.2 650.9699944555759 +71.68 722.6499947607517 +79.46 802.1099938452244 +80.02 882.1299904882908 +PREHOOK: query: explain vectorization detail +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: ts (type: timestamp), f (type: float) + sort order: ++ + Map-reduce partition columns: ts (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [4, 8] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col4, _col8 + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col4: float, _col8: timestamp + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: _col8 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col4 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~PRECEDING(1) + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: float), sum_window_0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23126 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 4400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +f sum_window_0 +3.17 NULL +10.89 3.1700000762939453 +14.54 14.0600004196167 +14.78 25.43000030517578 +17.85 29.31999969482422 +20.61 32.63000011444092 +28.69 38.46000099182129 +29.22 49.30000114440918 +31.17 57.90999984741211 +38.35 60.38999938964844 +38.61 69.51999855041504 +39.48 76.95999908447266 +40.54 78.09000015258789 +41.6 80.02000045776367 +46.08 82.13999938964844 +54.36 87.68000030517578 +56.94 100.44000244140625 +64.96 111.29999923706055 +73.52 121.89999771118164 +78.58 138.47999572753906 +81.41 152.0999984741211 +84.71 159.99000549316406 +87.43 166.12000274658203 +91.36 172.13999938964844 +92.96 178.79000091552734 +95.04 184.31999969482422 +0.83 NULL +1.99 0.8299999833106995 +3.73 2.8199999928474426 +8.86 5.7200000286102295 +10.62 12.589999675750732 +11.32 19.479999542236328 +12.83 21.9399995803833 +14.7 24.149999618530273 +14.96 27.52999973297119 +17.58 29.65999984741211 +19.1 32.53999996185303 +21.01 36.68000030517578 +26.95 40.11000061035156 +27.23 47.96000099182129 +29.07 54.18000030517578 +29.71 56.29999923706055 +31.84 58.779998779296875 +31.94 61.54999923706055 +35.32 63.78000068664551 +37.32 67.26000022888184 +38.5 72.63999938964844 +42.08 75.81999969482422 +44.3 80.58000183105469 +44.66 86.38000106811523 +46.84 88.95999908447266 +48.89 91.5 +49.64 95.72999954223633 +50.28 98.52999877929688 +52.09 99.91999816894531 +53.26 102.36999893188477 +54.09 105.3499984741211 +56.45 107.3499984741211 +56.76 110.54000091552734 +61.41 113.20999908447266 +61.88 118.16999816894531 +63.03 123.29000091552734 +64.55 124.90999984741211 +68.62 127.58000183105469 +76.13 133.17000579833984 +79.05 144.75 +80.43 155.18000030517578 +81.41 159.4800033569336 +82.85 161.84000396728516 +83.98 164.26000213623047 +84.21 166.8300018310547 +85.55 168.19000244140625 +87.93 169.76000213623047 +88.93 173.4800033569336 +94.27 176.86000061035156 +99.45 183.1999969482422 +0.36 NULL +0.48 0.36000001430511475 +0.79 0.8400000035762787 +1.27 1.270000010728836 +4.48 2.060000002384186 +9.0 5.75 +23.27 13.480000019073486 +25.13 32.27000045776367 +25.34 48.39999961853027 +25.91 50.46999931335449 +29.01 51.25 +30.47 54.920000076293945 +37.95 59.47999954223633 +39.3 68.42000007629395 +45.91 77.25 +52.44 85.20999908447266 +54.1 98.3499984741211 +56.7 106.53999710083008 +58.77 110.79999923706055 +62.09 115.47000122070312 +68.2 120.86000061035156 +71.68 130.28999710083008 +79.46 139.87999725341797 +80.02 151.13999938964844 +PREHOOK: query: explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), round((avg_window_0 / 10.0), 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 3, val 10.0) -> 4:double) -> 5:double + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +alice allen 65545 2.22 +alice allen 65557 2.58 +alice allen 65600 3.38 +alice allen 65609 2.99 +alice allen 65662 2.7 +alice allen 65670 2.88 +alice allen 65720 2.76 +PREHOOK: query: explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int), round(((avg_window_0 + 10.0) - (avg_window_0 - 10.0)), 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 4] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2)(children: DoubleColSubtractDoubleColumn(col 4, col 5)(children: DoubleColAddDoubleScalar(col 3, val 10.0) -> 4:double, DoubleColSubtractDoubleScalar(col 3, val 10.0) -> 5:double) -> 6:double) -> 4:double + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +PREHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i _c2 +alice allen 65545 20.0 +alice allen 65557 20.0 +alice allen 65600 20.0 +alice allen 65609 20.0 +alice allen 65662 20.0 +alice allen 65670 20.0 +alice allen 65720 20.0 +PREHOOK: query: explain vectorization detail +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Reduce Output Operator + key expressions: s (type: string), i (type: int) + sort order: ++ + Map-reduce partition columns: s (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + includeColumns: [2, 5, 7] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(10,0), bin:binary + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int, VALUE._col4:double + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), VALUE._col4 (type: double), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 0] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: int, _col5: double, _col7: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleAvg] + functionInputExpressions: [col 2] + functionNames: [avg] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 1, 2, 0] + outputTypes: [double, int, double, string] + partitionExpressions: [col 0] + streamingColumns: [] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 7 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 784 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 7 + Processor Tree: + ListSink + +PREHOOK: query: select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +s i +alice allen 65545 +alice allen 65557 +alice allen 65600 +alice allen 65609 +alice allen 65662 +alice allen 65670 +alice allen 65720 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out new file mode 100644 index 0000000..66e5939 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out @@ -0,0 +1,220 @@ +PREHOOK: query: drop table if exists smalltable_windowing +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists smalltable_windowing +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table smalltable_windowing( + i int, + type string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smalltable_windowing +POSTHOOK: query: create table smalltable_windowing( + i int, + type string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smalltable_windowing +PREHOOK: query: insert into smalltable_windowing values(3, 'a'), (1, 'a'), (2, 'a') +PREHOOK: type: QUERY +PREHOOK: Output: default@smalltable_windowing +POSTHOOK: query: insert into smalltable_windowing values(3, 'a'), (1, 'a'), (2, 'a') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@smalltable_windowing +POSTHOOK: Lineage: smalltable_windowing.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: smalltable_windowing.type SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: explain vectorization detail +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: smalltable_windowing + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Reduce Output Operator + key expressions: type (type: string), i (type: int) + sort order: ++ + Map-reduce partition columns: type (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: i:int, type:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: PTF Output Columns expression for PTF operator: Data type array of column collect_set_window_6 not supported + vectorized: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: max_window_0 + arguments: _col0 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: min_window_1 + arguments: _col0 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: first_value_window_2 + arguments: _col0 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: last_value_window_3 + arguments: _col0 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: avg_window_4 + arguments: _col0 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: sum_window_5 + arguments: _col0 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: collect_set_window_6 + arguments: _col0 + name: collect_set + window function: GenericUDAFMkCollectionEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + window function definition + alias: count_window_7 + arguments: _col0 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(1)~FOLLOWING(7) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: int), max_window_0 (type: int), min_window_1 (type: int), first_value_window_2 (type: int), last_value_window_3 (type: int), avg_window_4 (type: double), sum_window_5 (type: bigint), collect_set_window_6 (type: array), count_window_7 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing +PREHOOK: type: QUERY +PREHOOK: Input: default@smalltable_windowing +#### A masked pattern was here #### +POSTHOOK: query: select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smalltable_windowing +#### A masked pattern was here #### +type i max_window_0 min_window_1 first_value_window_2 last_value_window_3 avg_window_4 sum_window_5 collect_set_window_6 count_window_7 +a 1 3 1 1 3 2.0 6 [1,2,3] 3 +a 2 3 1 1 3 2.0 6 [1,2,3] 3 +a 3 3 2 2 3 2.5 5 [2,3] 2 diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index c38a215..d7d83a2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -457,26 +457,26 @@ POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +-59 +-58 +-54 +-50 NULL +-60 +-56 +-49 +-46 -64 -63 -62 -61 --60 --59 --58 --57 --56 -55 --54 -53 -52 -51 --50 --49 +-57 -48 -47 --46 PREHOOK: query: explain vectorization expression select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20 PREHOOK: type: QUERY @@ -696,37 +696,29 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: ctinyint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cdouble (type: double), ctinyint (type: tinyint) - outputColumnNames: cdouble, ctinyint - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [5, 0] - Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(ctinyint) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 5 - native: false - projectedOutputColumns: [0] - keys: cdouble (type: double) - mode: hash - outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 5 + native: false + projectedOutputColumns: [0] + keys: cdouble (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3185 Data size: 44512 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index df4b0d8..021d7fd 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -179,7 +179,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -211,7 +211,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -236,21 +236,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -451,7 +451,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -483,7 +483,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -508,7 +508,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -639,7 +639,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -793,7 +793,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -825,7 +825,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -850,21 +850,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1007,7 +1007,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1039,7 +1039,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1064,21 +1064,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1224,7 +1224,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1264,7 +1264,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Group By Operator @@ -1290,21 +1290,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1482,7 +1482,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1711,7 +1711,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1862,7 +1862,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1894,7 +1894,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -1919,7 +1919,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2057,7 +2057,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2090,7 +2090,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2115,21 +2115,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2268,7 +2268,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2300,7 +2300,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2325,21 +2325,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2481,7 +2481,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2536,7 +2536,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2576,7 +2576,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2601,21 +2601,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2764,7 +2764,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2796,7 +2796,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -2821,13 +2821,13 @@ STAGE PLANS: arguments: _col5 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) @@ -3020,7 +3020,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3072,7 +3072,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -3097,33 +3097,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -3265,7 +3265,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3496,7 +3496,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -3534,7 +3534,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -3559,7 +3559,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~CURRENT + window frame: ROWS PRECEDING(2)~CURRENT Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) @@ -3752,7 +3752,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3790,7 +3790,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -3815,21 +3815,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -3848,7 +3848,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -3873,7 +3873,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(5)~CURRENT + window frame: RANGE PRECEDING(5)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) @@ -3890,7 +3890,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -3915,28 +3915,28 @@ STAGE PLANS: arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) @@ -4188,7 +4188,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4250,7 +4250,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4290,7 +4290,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -4315,21 +4315,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4502,7 +4502,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4541,7 +4541,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4573,7 +4573,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4605,7 +4605,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -4630,21 +4630,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4812,7 +4812,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4851,7 +4851,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4886,16 +4886,29 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -4915,28 +4928,48 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1, col 1, col 2] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [3, 4] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5098,7 +5131,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5137,7 +5170,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5185,7 +5218,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5218,7 +5251,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -5243,21 +5276,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -5427,7 +5460,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5482,7 +5515,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5522,7 +5555,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank vectorized: false Reduce Operator Tree: Select Operator @@ -5547,21 +5580,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) @@ -5725,7 +5758,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5780,7 +5813,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5809,16 +5842,29 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -5838,28 +5884,48 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum] + functionInputExpressions: [col 1, col 1, col 2] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0] + streamingColumns: [3, 4] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 2, 5, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/windowing.q.out ql/src/test/results/clientpositive/llap/windowing.q.out index 468b67e..713fc3b 100644 --- ql/src/test/results/clientpositive/llap/windowing.q.out +++ ql/src/test/results/clientpositive/llap/windowing.q.out @@ -1868,7 +1868,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/windowing_windowspec.q.out new file mode 100644 index 0000000..9da6183 --- /dev/null +++ ql/src/test/results/clientpositive/llap/windowing_windowspec.q.out @@ -0,0 +1,955 @@ +PREHOOK: query: drop table over10k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table over10k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice ichabod 4294967441 +alice robinson 8589934917 +bob robinson 12884902266 +calvin thompson 17179869602 +david johnson 21474837092 +david laertes 25769804523 +david nixon 30064771904 +david nixon 34359739395 +ethan johnson 38654706752 +ethan ovid 42949674180 +ethan underhill 47244641690 +fred miller 51539609102 +fred miller 55834576592 +gabriella garcia 60129544023 +gabriella underhill 64424511330 +holly white 68719478650 +irene johnson 73014446110 +katie ellison 77309413485 +luke allen 81604380948 +mike quirinius 85899348426 +mike white 90194315855 +nick davidson 94489283385 +oscar allen 98784250693 +oscar garcia 103079218190 +oscar ichabod 107374185594 +oscar ovid 111669153102 +oscar steinbeck 115964120553 +priscilla garcia 120259087901 +priscilla white 124554055390 +priscilla xylophone 128849022850 +priscilla young 133143990191 +rachel brown 137438957640 +rachel ichabod 141733924974 +rachel xylophone 146028892291 +sarah thompson 150323859590 +sarah thompson 154618826928 +tom johnson 158913794359 +tom steinbeck 163208761724 +ulysses polk 167503729208 +victor johnson 171798696592 +wendy polk 176093663918 +xavier davidson 180388631312 +yuri ellison 184683598825 +zach allen 188978566334 +zach hernandez 193273533646 +alice ellison 4294967446 +bob carson 8589934892 +calvin brown 12884902329 +david xylophone 17179869748 +ethan white 21474837241 +fred johnson 25769804704 +fred van buren 30064772167 +gabriella ichabod 34359739606 +holly laertes 38654707054 +holly quirinius 42949674584 +jessica hernandez 47244642120 +katie robinson 51539609539 +katie thompson 55834576895 +luke nixon 60129544345 +mike garcia 64424511764 +mike hernandez 68719479285 +nick carson 73014446621 +nick davidson 77309414083 +oscar carson 81604381543 +oscar robinson 85899348869 +priscilla white 90194316274 +sarah falkner 94489283722 +sarah ichabod 98784251271 +ulysses falkner 103079218819 +victor xylophone 107374186359 +wendy garcia 111669153733 +wendy van buren 115964121147 +xavier underhill 120259088561 +yuri garcia 124554056001 +yuri quirinius 128849023443 +yuri white 133143990852 +zach falkner 137438958357 +zach ichabod 141733925776 +zach nixon 146028893205 +zach ovid 150323860576 +alice ichabod 4294967451 +alice king 8589934958 +alice robinson 12884902278 +calvin allen 17179869612 +gabriella johnson 21474837108 +gabriella nixon 25769804436 +holly falkner 30064771905 +holly hernandez 34359739256 +holly thompson 38654706595 +katie nixon 42949674112 +luke brown 47244641636 +luke davidson 51539608978 +luke white 55834576299 +mike brown 60129543641 +nick quirinius 64424511126 +oscar white 68719478551 +priscilla xylophone 73014446004 +quinn garcia 77309413317 +quinn laertes 81604380656 +rachel young 85899348171 +PREHOOK: query: select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +calvin miller 8.390000343322754 +holly polk 5.289999961853027 +wendy quirinius 30.789999961853027 +yuri laertes 68.38000011444092 +nick steinbeck 79.23999786376953 +katie brown 60.0 +priscilla quirinius 137.83999633789062 +tom young 186.33999633789062 +gabriella quirinius 14.359999656677246 +katie falkner 65.92999935150146 +xavier robinson 153.84000301361084 +ethan carson 40.90999984741211 +victor johnson 100.0 +jessica king 92.70999908447266 +jessica white 124.16999816894531 +zach white 170.71999740600586 +holly falkner 97.3499984741211 +quinn falkner 196.23999786376953 +victor davidson 255.95999908447266 +holly young 19.110000610351562 +nick robinson 13.329999923706055 +xavier steinbeck 48.53999900817871 +irene king 30.469999313354492 +quinn zipper 90.04000091552734 +priscilla miller 15.359999656677246 +wendy zipper 92.8000020980835 +yuri miller 153.5600004196167 +zach steinbeck 9.069999694824219 +fred nixon 50.08000183105469 +katie brown 13.300000190734863 +nick davidson 87.05000305175781 +gabriella davidson 3.940000057220459 +zach carson 70.88999700546265 +holly hernandez 48.52000045776367 +jessica quirinius 90.18000030517578 +tom xylophone 166.11000061035156 +wendy king 184.76000022888184 +gabriella brown 84.83000183105469 +quinn johnson 134.9800033569336 +yuri zipper 205.75 +david robinson 64.79000091552734 +mike nixon 153.7300033569336 +gabriella white 1.4199999570846558 +rachel davidson 98.12999904155731 +yuri garcia 9.880000114440918 +yuri zipper 104.01999950408936 +alice king 85.72000122070312 +jessica steinbeck 111.41000175476074 +katie hernandez 178.9699993133545 +katie ovid 40.0 +priscilla young 101.72999954223633 +quinn davidson 196.8400001525879 +quinn van buren 279.6400032043457 +victor steinbeck 309.6400032043457 +gabriella brown 80.6500015258789 +jessica ichabod 96.54000091552734 +zach laertes 104.50000095367432 +ethan miller 49.61000061035156 +irene carson 110.68000030517578 +irene falkner 131.42000007629395 +priscilla zipper 201.39000129699707 +tom robinson 290.75000190734863 +katie polk 38.689998626708984 +nick white 96.93999862670898 +sarah davidson 99.59999871253967 +xavier laertes 161.30999779701233 +alice ichabod 32.689998626708984 +nick polk 130.97999954223633 +gabriella robinson 90.0999984741211 +luke brown 90.71999847888947 +wendy allen 116.34999763965607 +calvin ichabod 29.059999465942383 +holly steinbeck 98.4799976348877 +gabriella carson 38.09000015258789 +holly van buren 106.89999771118164 +tom nixon 191.92999649047852 +katie laertes 75.75 +mike brown 163.97000122070312 +oscar nixon 24.020000457763672 +zach garcia 101.61999893188477 +tom polk 76.98999786376953 +mike allen 96.44999694824219 +alice johnson 1.090000033378601 +holly robinson 26.209999084472656 +priscilla thompson 111.12999725341797 +yuri young 168.73999786376953 +rachel carson 80.98999786376953 +gabriella laertes 39.81999969482422 +victor brown 78.97999954223633 +bob carson 24.149999618530273 +holly allen 68.71999931335449 +fred nixon 38.04999923706055 +rachel carson 119.60000228881836 +alice nixon 49.130001068115234 +priscilla brown 123.57999801635742 +victor falkner 42.4900016784668 +david garcia 67.27999877929688 +holly hernandez 116.36999893188477 +tom white 154.0 +rachel ellison 10.600000381469727 +PREHOOK: query: select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +gabriella xylophone 1276.850001335144 +calvin brown 1273.68000125885 +jessica laertes 1262.7900009155273 +yuri allen 1248.2500009536743 +tom johnson 1233.4700012207031 +bob ovid 1215.6200008392334 +fred nixon 1195.0100002288818 +oscar brown 1166.3199996948242 +calvin laertes 1137.1000003814697 +david falkner 1105.9300003051758 +calvin steinbeck 1067.5800018310547 +katie white 1028.9700012207031 +sarah falkner 989.4900016784668 +mike laertes 948.9500007629395 +victor ellison 907.3500022888184 +luke zipper 861.2700004577637 +rachel garcia 806.9099998474121 +wendy steinbeck 749.9700012207031 +priscilla zipper 685.0100021362305 +rachel thompson 611.4900054931641 +victor van buren 532.9100036621094 +fred zipper 451.5 +gabriella van buren 366.79000091552734 +nick carson 279.36000061035156 +katie king 188.0 +jessica polk 95.04000091552734 +oscar davidson 2368.430002987385 +xavier johnson 2367.600003004074 +rachel ovid 2365.6100029945374 +xavier davidson 2361.880002975464 +nick ellison 2353.0200033187866 +jessica robinson 2342.4000034332275 +bob king 2331.0800037384033 +ulysses xylophone 2318.2500038146973 +wendy thompson 2303.550004005432 +yuri brown 2288.590003967285 +ethan ovid 2271.010004043579 +rachel robinson 2251.9100036621094 +holly falkner 2230.9000034332275 +calvin nixon 2203.950002670288 +luke thompson 2176.7200031280518 +gabriella johnson 2147.6500034332275 +jessica brown 2117.940004348755 +quinn allen 2086.100004196167 +irene brown 2054.1600036621094 +katie zipper 2018.8400039672852 +gabriella steinbeck 1981.520004272461 +priscilla brown 1943.020004272461 +zach young 1900.9400024414062 +alice miller 1856.6400032043457 +priscilla zipper 1811.9800033569336 +rachel young 1765.1400032043457 +holly thompson 1716.2500038146973 +calvin white 1666.6100044250488 +priscilla hernandez 1616.330005645752 +fred polk 1564.240005493164 +sarah van buren 1510.9800071716309 +rachel ovid 1456.890007019043 +luke xylophone 1400.4400062561035 +yuri hernandez 1343.6800079345703 +oscar van buren 1282.2700080871582 +quinn ovid 1220.390007019043 +victor underhill 1157.360008239746 +luke king 1092.8100051879883 +calvin carson 1024.1900024414062 +jessica brown 948.0600051879883 +jessica nixon 869.0100021362305 +katie davidson 788.5800018310547 +fred king 707.1699981689453 +wendy johnson 624.3199996948242 +ulysses johnson 540.3399963378906 +katie xylophone 456.12999725341797 +ethan young 370.57999420166016 +gabriella underhill 282.6499938964844 +luke steinbeck 193.7199935913086 +bob falkner 99.44999694824219 +holly allen 1607.950005441904 +rachel ichabod 1607.590005427599 +bob carson 1607.1100054383278 +wendy miller 1606.3200054168701 +nick king 1605.0500054359436 +rachel ellison 1600.5700054168701 +yuri garcia 1591.5700054168701 +victor hernandez 1568.3000049591064 +wendy underhill 1543.1700057983398 +alice underhill 1517.830005645752 +rachel polk 1491.9200057983398 +holly nixon 1462.910005569458 +ethan nixon 1432.4400062561035 +sarah falkner 1394.490005493164 +tom hernandez 1355.1900062561035 +rachel ichabod 1309.2800064086914 +priscilla thompson 1256.8400077819824 +jessica thompson 1202.7400093078613 +ulysses carson 1146.0400085449219 +wendy falkner 1087.2700080871582 +calvin white 1025.1800079345703 +jessica ovid 956.9800109863281 +jessica johnson 885.3000106811523 +priscilla garcia 805.8400115966797 +PREHOOK: query: select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +bob ovid 28.053333441416424 +calvin brown 38.73666652043661 +calvin laertes 51.493333180745445 +calvin steinbeck 46.826666514078774 +david falkner 42.81499973932902 +fred nixon 52.26333347956339 +fred zipper 62.97499990463257 +gabriella van buren 55.43666664759318 +gabriella xylophone 49.925000031789146 +jessica laertes 56.32999976476034 +jessica polk 69.13333320617676 +katie king 58.16333293914795 +katie white 54.92333253224691 +luke zipper 57.83333237965902 +mike laertes 61.86999924977621 +nick carson 61.69333299001058 +oscar brown 49.44166628519694 +priscilla zipper 52.25166670481364 +rachel garcia 53.56666787465414 +rachel thompson 54.903334617614746 +sarah falkner 44.27000093460083 +tom johnson 45.01600093841553 +victor ellison 51.80750107765198 +victor van buren 53.71666749318441 +wendy steinbeck 39.869999408721924 +yuri allen 14.779999732971191 +alice miller 51.76333204905192 +bob falkner 47.50333213806152 +bob king 45.58333269755045 +calvin carson 57.253332455952965 +calvin nixon 53.441665967305504 +calvin white 53.85499922434489 +ethan ovid 51.891666094462074 +ethan young 63.52999941507975 +fred king 53.36666615804037 +fred polk 47.83166631062826 +gabriella johnson 44.84166653951009 +gabriella steinbeck 45.1966667175293 +gabriella underhill 51.95500055948893 +holly falkner 50.538333892822266 +holly thompson 47.93333371480306 +irene brown 53.22833442687988 +jessica brown 61.600001653035484 +jessica brown 62.51333491007487 +jessica nixon 60.775001525878906 +jessica robinson 63.08166758219401 +katie davidson 66.04000091552734 +katie xylophone 61.931666692097984 +katie zipper 49.44333283106486 +luke king 43.36166621247927 +luke steinbeck 42.238332599401474 +luke thompson 33.54000013073286 +luke xylophone 37.376666873693466 +nick ellison 35.72333384553591 +oscar davidson 39.27666728695234 +oscar van buren 49.643333752950035 +priscilla brown 39.95166691144308 +priscilla hernandez 42.346666733423866 +priscilla zipper 37.166666746139526 +quinn allen 37.50833328564962 +quinn ovid 41.199999888738 +rachel ovid 44.729999939600624 +rachel ovid 46.558333237965904 +rachel robinson 47.90833361943563 +rachel young 58.40333414077759 +sarah van buren 52.74833424886068 +ulysses johnson 45.21000083287557 +ulysses xylophone 31.506667653719585 +victor underhill 31.98666767279307 +wendy johnson 31.46333380540212 +wendy thompson 24.84999978542328 +xavier davidson 26.82799973487854 +xavier johnson 31.319999754428864 +yuri brown 41.09666633605957 +yuri hernandez 52.85499954223633 +zach young 44.29999923706055 +alice underhill 38.0366666217645 +bob carson 38.7966665327549 +calvin white 51.90833304325739 +ethan ichabod 52.48833360274633 +ethan nixon 46.103333373864494 +holly allen 40.5249999165535 +holly nixon 55.85333355267843 +jessica johnson 64.11166644096375 +jessica ovid 66.54166674613953 +jessica thompson 69.09166725476582 +nick king 68.65833353996277 +oscar carson 82.59166717529297 +priscilla garcia 80.75166702270508 +priscilla hernandez 68.91500091552734 +priscilla polk 53.32166742781798 +priscilla thompson 47.56499997278055 +quinn van buren 43.383333598574005 +rachel davidson 35.253333166241646 +rachel ellison 29.356666321555775 +rachel ichabod 37.651666397849716 +rachel ichabod 41.75999959309896 +rachel polk 49.56333351135254 +sarah falkner 59.53333377838135 +tom hernandez 63.331667264302574 +PREHOOK: query: select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 33.20166666666666 +alice davidson 30.741428571428568 +alice falkner 27.742499999999996 +alice king 26.706666666666663 +alice king 26.306999999999995 +alice xylophone 24.458181818181814 +bob ellison 25.029090909090908 +bob falkner 24.216363636363635 +bob ichabod 20.173636363636362 +bob johnson 16.431818181818176 +bob polk 16.640909090909087 +bob underhill 15.266363636363632 +bob underhill 18.288181818181812 +bob van buren 18.405454545454543 +calvin ichabod 20.90363636363636 +calvin white 22.448181818181812 +david carson 24.329090909090898 +david falkner 25.01181818181817 +david garcia 22.984545454545444 +david hernandez 22.92272727272726 +ethan steinbeck 24.026363636363627 +ethan underhill 25.189090909090904 +fred ellison 27.159999999999993 +gabriella brown 25.66454545454545 +holly nixon 25.70545454545454 +holly polk 24.11818181818182 +holly steinbeck 24.49090909090909 +holly thompson 23.376363636363635 +holly underhill 19.453636363636363 +irene ellison 20.378181818181826 +irene underhill 23.510000000000012 +irene young 25.371818181818195 +jessica johnson 24.42636363636365 +jessica king 26.380000000000017 +jessica miller 23.99545454545456 +jessica white 26.866363636363655 +katie ichabod 28.520909090909115 +luke garcia 26.110909090909114 +luke ichabod 27.41909090909093 +luke king 28.713636363636375 +luke young 30.59181818181818 +mike allen 27.91545454545455 +mike king 25.526363636363644 +mike polk 24.774545454545464 +mike white 25.18363636363637 +mike xylophone 27.50818181818182 +nick nixon 26.225454545454546 +nick robinson 24.34454545454545 +oscar davidson 26.719090909090916 +oscar garcia 27.196363636363643 +oscar johnson 27.08272727272728 +oscar johnson 25.164545454545472 +oscar miller 28.059090909090916 +priscilla laertes 31.73727272727274 +priscilla quirinius 30.353636363636372 +priscilla zipper 27.961818181818195 +quinn ellison 29.40636363636366 +quinn polk 27.267272727272754 +rachel davidson 25.415454545454562 +rachel thompson 23.608181818181823 +sarah miller 21.49909090909091 +sarah robinson 23.40454545454546 +sarah xylophone 26.957272727272724 +sarah zipper 24.83545454545455 +tom hernandez 21.274545454545454 +tom hernandez 20.315454545454546 +tom polk 21.90181818181819 +tom steinbeck 20.772727272727273 +ulysses carson 21.647272727272718 +ulysses ellison 22.960909090909084 +ulysses quirinius 23.025454545454544 +ulysses robinson 23.762727272727282 +ulysses steinbeck 21.08909090909091 +victor allen 16.628181818181826 +victor hernandez 15.74909090909091 +victor robinson 18.193636363636355 +victor thompson 20.81181818181817 +victor xylophone 20.372727272727243 +wendy quirinius 20.81636363636362 +wendy robinson 19.936363636363634 +wendy xylophone 20.270909090909093 +xavier garcia 19.874000000000002 +xavier ovid 19.976666666666663 +yuri xylophone 21.89625000000001 +zach thompson 25.021428571428583 +zach young 27.77666666666668 +alice carson 18.785 +alice nixon 17.58142857142857 +alice underhill 17.072499999999998 +alice underhill 19.146666666666665 +alice xylophone 20.556 +bob falkner 19.116363636363637 +bob king 21.04 +bob ovid 20.854545454545452 +bob van buren 21.988181818181815 +bob xylophone 24.364545454545453 +calvin xylophone 26.91272727272727 +david falkner 27.31 +david laertes 28.00454545454545 +david miller 28.40090909090909 +PREHOOK: query: select s, sum(i) over(partition by ts order by s) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, sum(i) over(partition by ts order by s) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +bob ovid 65748 +calvin brown 131440 +calvin laertes 197097 +calvin steinbeck 262874 +david falkner 328506 +fred nixon 394118 +fred zipper 459719 +gabriella van buren 525334 +gabriella xylophone 591058 +jessica laertes 656771 +jessica polk 722558 +katie king 788310 +katie white 853920 +luke zipper 919543 +mike laertes 985277 +nick carson 1050928 +oscar brown 1116474 +priscilla zipper 1182084 +rachel garcia 1247836 +rachel thompson 1313378 +sarah falkner 1379093 +tom johnson 1444791 +victor ellison 1510421 +victor van buren 1576006 +wendy steinbeck 1641591 +yuri allen 1707256 +alice miller 65581 +bob falkner 131319 +bob king 197015 +calvin carson 262712 +calvin nixon 328407 +calvin white 393960 +ethan ovid 459504 +ethan young 525178 +fred king 590838 +fred polk 656600 +gabriella johnson 722283 +gabriella steinbeck 787886 +gabriella underhill 853497 +holly falkner 919218 +holly thompson 985000 +irene brown 1050757 +jessica brown 1182155 +jessica brown 1182155 +jessica nixon 1247815 +jessica robinson 1313437 +katie davidson 1379172 +katie xylophone 1444746 +katie zipper 1510302 +luke king 1576084 +luke steinbeck 1641724 +luke thompson 1707324 +luke xylophone 1773102 +nick ellison 1838744 +oscar davidson 1904390 +oscar van buren 1969971 +priscilla brown 2035582 +priscilla hernandez 2101353 +priscilla zipper 2166925 +quinn allen 2232487 +quinn ovid 2298060 +rachel ovid 2429366 +rachel ovid 2429366 +rachel robinson 2495140 +rachel young 2560880 +sarah van buren 2626599 +ulysses johnson 2692259 +ulysses xylophone 2757830 +victor underhill 2823401 +wendy johnson 2889058 +wendy thompson 2954831 +xavier davidson 3020367 +xavier johnson 3086050 +yuri brown 3151628 +yuri hernandez 3217338 +zach young 3283046 +alice underhill 65705 +bob carson 131461 +calvin white 197044 +ethan ichabod 262796 +ethan nixon 328501 +holly allen 394248 +holly nixon 459928 +jessica johnson 525664 +jessica ovid 591415 +jessica thompson 657122 +nick king 722691 +oscar carson 788459 +priscilla garcia 854222 +priscilla hernandez 919979 +priscilla polk 985680 +priscilla thompson 1051347 +quinn van buren 1117102 +rachel davidson 1182710 +rachel ellison 1248448 +rachel ichabod 1379923 +rachel ichabod 1379923 +rachel polk 1445518 +sarah falkner 1511234 +tom hernandez 1576947 +PREHOOK: query: select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +3.17 3.1700000762939453 +10.89 14.0600004196167 +14.54 28.600000381469727 +14.78 43.38000011444092 +17.85 61.230000495910645 +20.61 81.8400011062622 +28.69 110.53000164031982 +29.22 139.75000095367432 +31.17 170.92000102996826 +38.35 209.26999950408936 +38.61 247.88000011444092 +39.48 287.35999965667725 +40.54 327.9000005722046 +41.6 369.4999990463257 +46.08 415.58000087738037 +54.36 469.94000148773193 +56.94 526.8800001144409 +64.96 591.8399991989136 +73.52 665.35999584198 +78.58 743.9399976730347 +81.41 825.350001335144 +84.71 910.0600004196167 +87.43 997.4900007247925 +91.36 1088.850001335144 +92.96 1181.8100004196167 +95.04 1276.850001335144 +0.83 0.8299999833106995 +1.99 2.8199999928474426 +3.73 6.550000011920929 +8.86 15.409999668598175 +10.62 26.029999554157257 +11.32 37.349999248981476 +12.83 50.17999917268753 +14.7 64.87999898195267 +14.96 79.83999902009964 +17.58 97.4199989438057 +19.1 116.51999932527542 +21.01 137.52999955415726 +26.95 164.4800003170967 +27.23 191.70999985933304 +29.07 220.77999955415726 +29.71 250.4899986386299 +31.84 282.3299987912178 +31.94 314.2699993252754 +35.32 349.58999902009964 +37.32 386.90999871492386 +38.5 425.40999871492386 +42.08 467.49000054597855 +44.3 511.7899997830391 +44.66 556.4499996304512 +46.84 603.2899997830391 +48.89 652.1799991726875 +49.64 701.819998562336 +50.28 752.0999973416328 +52.09 804.1899974942207 +53.26 857.4499958157539 +54.09 911.5399959683418 +56.45 967.9899967312813 +56.76 1024.7499950528145 +61.41 1086.1599949002266 +61.88 1148.0399959683418 +63.03 1211.0699947476387 +64.55 1275.6199977993965 +68.62 1344.2400005459785 +76.13 1420.3699977993965 +79.05 1499.4200008511543 +80.43 1579.85000115633 +81.41 1661.2600048184395 +82.85 1744.1100032925606 +83.98 1828.0900066494942 +84.21 1912.3000057339668 +85.55 1997.8500087857246 +87.93 2085.7800090909004 +88.93 2174.710009396076 +94.27 2268.9800060391426 +99.45 2368.430002987385 +0.36 0.36000001430511475 +0.48 0.8400000035762787 +0.79 1.6300000250339508 +1.27 2.9000000059604645 +4.48 7.380000025033951 +9.0 16.38000002503395 +23.27 39.65000048279762 +25.13 64.77999964356422 +25.34 90.11999979615211 +25.91 116.02999964356422 +29.01 145.03999987244606 +30.47 175.50999918580055 +37.95 213.45999994874 +39.3 252.75999918580055 +45.91 298.66999903321266 +52.44 351.10999765992165 +54.1 405.20999613404274 +56.7 461.9099968969822 +58.77 520.6799973547459 +62.09 582.7699975073338 +68.2 650.9699944555759 +71.68 722.6499947607517 +79.46 802.1099938452244 +80.02 882.1299904882908 +PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +3.17 NULL +10.89 3.1700000762939453 +14.54 14.0600004196167 +14.78 25.43000030517578 +17.85 29.31999969482422 +20.61 32.63000011444092 +28.69 38.46000099182129 +29.22 49.30000114440918 +31.17 57.90999984741211 +38.35 60.38999938964844 +38.61 69.51999855041504 +39.48 76.95999908447266 +40.54 78.09000015258789 +41.6 80.02000045776367 +46.08 82.13999938964844 +54.36 87.68000030517578 +56.94 100.44000244140625 +64.96 111.29999923706055 +73.52 121.89999771118164 +78.58 138.47999572753906 +81.41 152.0999984741211 +84.71 159.99000549316406 +87.43 166.12000274658203 +91.36 172.13999938964844 +92.96 178.79000091552734 +95.04 184.31999969482422 +0.83 NULL +1.99 0.8299999833106995 +3.73 2.8199999928474426 +8.86 5.7200000286102295 +10.62 12.589999675750732 +11.32 19.479999542236328 +12.83 21.9399995803833 +14.7 24.149999618530273 +14.96 27.52999973297119 +17.58 29.65999984741211 +19.1 32.53999996185303 +21.01 36.68000030517578 +26.95 40.11000061035156 +27.23 47.96000099182129 +29.07 54.18000030517578 +29.71 56.29999923706055 +31.84 58.779998779296875 +31.94 61.54999923706055 +35.32 63.78000068664551 +37.32 67.26000022888184 +38.5 72.63999938964844 +42.08 75.81999969482422 +44.3 80.58000183105469 +44.66 86.38000106811523 +46.84 88.95999908447266 +48.89 91.5 +49.64 95.72999954223633 +50.28 98.52999877929688 +52.09 99.91999816894531 +53.26 102.36999893188477 +54.09 105.3499984741211 +56.45 107.3499984741211 +56.76 110.54000091552734 +61.41 113.20999908447266 +61.88 118.16999816894531 +63.03 123.29000091552734 +64.55 124.90999984741211 +68.62 127.58000183105469 +76.13 133.17000579833984 +79.05 144.75 +80.43 155.18000030517578 +81.41 159.4800033569336 +82.85 161.84000396728516 +83.98 164.26000213623047 +84.21 166.8300018310547 +85.55 168.19000244140625 +87.93 169.76000213623047 +88.93 173.4800033569336 +94.27 176.86000061035156 +99.45 183.1999969482422 +0.36 NULL +0.48 0.36000001430511475 +0.79 0.8400000035762787 +1.27 1.270000010728836 +4.48 2.060000002384186 +9.0 5.75 +23.27 13.480000019073486 +25.13 32.27000045776367 +25.34 48.39999961853027 +25.91 50.46999931335449 +29.01 51.25 +30.47 54.920000076293945 +37.95 59.47999954223633 +39.3 68.42000007629395 +45.91 77.25 +52.44 85.20999908447266 +54.1 98.3499984741211 +56.7 106.53999710083008 +58.77 110.79999923706055 +62.09 115.47000122070312 +68.2 120.86000061035156 +71.68 130.28999710083008 +79.46 139.87999725341797 +80.02 151.13999938964844 +PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 65545 2.22 +alice allen 65557 2.58 +alice allen 65600 3.38 +alice allen 65609 2.99 +alice allen 65662 2.7 +alice allen 65670 2.88 +alice allen 65720 2.76 +PREHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 65545 20.0 +alice allen 65557 20.0 +alice allen 65600 20.0 +alice allen 65609 20.0 +alice allen 65662 20.0 +alice allen 65670 20.0 +alice allen 65720 20.0 +PREHOOK: query: select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 65545 +alice allen 65557 +alice allen 65600 +alice allen 65609 +alice allen 65662 +alice allen 65670 +alice allen 65720 diff --git ql/src/test/results/clientpositive/outer_reference_windowed.q.out ql/src/test/results/clientpositive/outer_reference_windowed.q.out index 1df6091..003a04c 100644 --- ql/src/test/results/clientpositive/outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/outer_reference_windowed.q.out @@ -189,7 +189,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) @@ -303,7 +303,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) @@ -473,7 +473,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) @@ -647,7 +647,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumHiveDecimal - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) @@ -803,7 +803,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double) diff --git ql/src/test/results/clientpositive/pcs.q.out ql/src/test/results/clientpositive/pcs.q.out index dc2a476..af5d11a 100644 --- ql/src/test/results/clientpositive/pcs.q.out +++ ql/src/test/results/clientpositive/pcs.q.out @@ -999,7 +999,7 @@ STAGE PLANS: arguments: _col0 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/ppd_windowing1.q.out ql/src/test/results/clientpositive/ppd_windowing1.q.out index ad57ba9..1adc8fc 100644 --- ql/src/test/results/clientpositive/ppd_windowing1.q.out +++ ql/src/test/results/clientpositive/ppd_windowing1.q.out @@ -44,7 +44,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -110,7 +110,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -176,7 +176,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -242,7 +242,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -308,7 +308,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col0) + 2) (type: int), sum_window_0 (type: double) @@ -374,7 +374,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -440,7 +440,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -506,7 +506,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -572,7 +572,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -638,7 +638,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col0) + 2) (type: int), sum_window_0 (type: double) @@ -705,7 +705,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string) @@ -751,7 +751,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -818,7 +818,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string) @@ -864,7 +864,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -931,7 +931,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string) @@ -977,7 +977,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1044,7 +1044,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string) @@ -1090,7 +1090,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col1) + 2) (type: int), _col0 (type: double), sum_window_1 (type: double) @@ -1157,7 +1157,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string), _col1 (type: string) @@ -1203,7 +1203,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1270,7 +1270,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string), _col1 (type: string) @@ -1316,7 +1316,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1383,7 +1383,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string), _col1 (type: string) @@ -1429,7 +1429,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1496,7 +1496,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string), _col1 (type: string) @@ -1542,7 +1542,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col1) + 2) (type: int), _col0 (type: double), sum_window_1 (type: double) @@ -1606,7 +1606,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double), _col0 (type: string), _col1 (type: string) @@ -1652,7 +1652,7 @@ STAGE PLANS: arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), avg_window_1 (type: double) @@ -1719,7 +1719,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -1786,7 +1786,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -1852,7 +1852,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1919,7 +1919,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1986,7 +1986,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) diff --git ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out index 025ec75..8171f47 100644 --- ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out +++ ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out @@ -243,7 +243,7 @@ STAGE PLANS: alias: row_number_window_0 name: row_number window function: GenericUDAFRowNumberEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/quotedid_basic.q.out ql/src/test/results/clientpositive/quotedid_basic.q.out index 8897ae9..83ed387 100644 --- ql/src/test/results/clientpositive/quotedid_basic.q.out +++ ql/src/test/results/clientpositive/quotedid_basic.q.out @@ -217,7 +217,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -319,7 +319,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/semijoin2.q.out ql/src/test/results/clientpositive/semijoin2.q.out index 757341a..d6a0b90 100644 --- ql/src/test/results/clientpositive/semijoin2.q.out +++ ql/src/test/results/clientpositive/semijoin2.q.out @@ -163,7 +163,7 @@ STAGE PLANS: arguments: COALESCE((- 973),(- 684),515) name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -210,7 +210,7 @@ STAGE PLANS: arguments: COALESCE(62,(- 380),(- 435)) name: SUM window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~FOLLOWING(48) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: COALESCE(498,_col0,524) (type: int), (_col99 + _col17) (type: int), floor(_col22) (type: bigint), COALESCE(SUM_window_1,704) (type: bigint) diff --git ql/src/test/results/clientpositive/semijoin4.q.out ql/src/test/results/clientpositive/semijoin4.q.out index d6117ed..1cf8c96 100644 --- ql/src/test/results/clientpositive/semijoin4.q.out +++ ql/src/test/results/clientpositive/semijoin4.q.out @@ -175,7 +175,7 @@ STAGE PLANS: arguments: -973 name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/semijoin5.q.out ql/src/test/results/clientpositive/semijoin5.q.out index db4f551..7857e3b 100644 --- ql/src/test/results/clientpositive/semijoin5.q.out +++ ql/src/test/results/clientpositive/semijoin5.q.out @@ -172,7 +172,7 @@ STAGE PLANS: arguments: -973 name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -219,7 +219,7 @@ STAGE PLANS: arguments: 62 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~FOLLOWING(48) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) diff --git ql/src/test/results/clientpositive/spark/groupby_resolution.q.out ql/src/test/results/clientpositive/spark/groupby_resolution.q.out index 6352c2b..64c1dca 100644 --- ql/src/test/results/clientpositive/spark/groupby_resolution.q.out +++ ql/src/test/results/clientpositive/spark/groupby_resolution.q.out @@ -678,7 +678,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 2e31fbd..82fc9f8 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -91,21 +91,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -293,7 +293,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -563,21 +563,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -738,21 +738,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -925,21 +925,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1409,7 +1409,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1580,21 +1580,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1751,21 +1751,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1982,21 +1982,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2163,13 +2163,13 @@ STAGE PLANS: arguments: _col5 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) @@ -2373,33 +2373,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2729,7 +2729,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~CURRENT + window frame: ROWS PRECEDING(2)~CURRENT Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) @@ -2914,21 +2914,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2966,7 +2966,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(5)~CURRENT + window frame: RANGE PRECEDING(5)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) @@ -3002,28 +3002,28 @@ STAGE PLANS: arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) @@ -3402,21 +3402,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -3666,21 +3666,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -3906,21 +3906,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4183,21 +4183,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4442,21 +4442,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) @@ -4688,21 +4688,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/spark/ptf_streaming.q.out ql/src/test/results/clientpositive/spark/ptf_streaming.q.out index 49892e2..cf36bb1 100644 --- ql/src/test/results/clientpositive/spark/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/spark/ptf_streaming.q.out @@ -91,21 +91,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -293,7 +293,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -607,7 +607,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -778,21 +778,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1009,21 +1009,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1242,21 +1242,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1475,21 +1475,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1687,33 +1687,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1960,21 +1960,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2224,21 +2224,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2472,21 +2472,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index fa184d8..07cb810 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -327,7 +327,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -493,7 +493,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index 868cda5..6c58639 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -435,7 +435,7 @@ STAGE PLANS: arguments: _col1 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), avg_window_0 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_7.q.out ql/src/test/results/clientpositive/spark/vectorization_7.q.out index 9783907..925070a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_7.q.out @@ -97,8 +97,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -331,8 +331,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_8.q.out ql/src/test/results/clientpositive/spark/vectorization_8.q.out index b5c056f..cf3bb82 100644 --- ql/src/test/results/clientpositive/spark/vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_8.q.out @@ -93,8 +93,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -314,8 +314,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/windowing.q.out ql/src/test/results/clientpositive/spark/windowing.q.out index d4be0b3..6bd9ce9 100644 --- ql/src/test/results/clientpositive/spark/windowing.q.out +++ ql/src/test/results/clientpositive/spark/windowing.q.out @@ -1864,7 +1864,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 2b3af79..db532e8 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -1803,7 +1803,7 @@ STAGE PLANS: arguments: _col1 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: first_value_window_0 is not null (type: boolean) diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 0a7a36f..71a52a9 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -316,7 +316,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -482,7 +482,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git ql/src/test/results/clientpositive/union_remove_6_subq.q.out ql/src/test/results/clientpositive/union_remove_6_subq.q.out index 0bd00c9..dbe2bd9 100644 --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -525,7 +525,7 @@ STAGE PLANS: arguments: _col1 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), avg_window_0 (type: double) diff --git ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out new file mode 100644 index 0000000..8f968f3 --- /dev/null +++ ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out @@ -0,0 +1,2895 @@ +PREHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text +POSTHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_mfgr SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_name SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_retailprice SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_retailprice, type:double, comment:null), ] +vector_ptf_part_simple_text.p_mfgr vector_ptf_part_simple_text.p_name vector_ptf_part_simple_text.p_retailprice +PREHOOK: query: select * from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +vector_ptf_part_simple_orc.p_mfgr vector_ptf_part_simple_orc.p_name vector_ptf_part_simple_orc.p_retailprice +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod NULL +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque NULL +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c, +count(*) over(partition by p_mfgr) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 2 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 4 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique burnished rose metallic 1173.15 7 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 8 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 9 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 11 1 1 1753.76 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 1 1 1753.76 1632.66 11 12 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 2 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 3 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 6 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7 1 1 900.66 1698.66 8 8 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 8 1 1 900.66 1698.66 8 8 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique metallic orange dim 55.39 4 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique misty red olive 1922.98 5 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 7 1 1 1337.29 99.68 7 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 8 1 1 1337.29 99.68 7 8 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 2 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique gainsboro frosted violet NULL 3 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 4 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond antique violet mint lemon 1375.42 5 1 1 1290.35 1206.26 4 6 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 6 1 1 1290.35 1206.26 4 6 +Manufacturer#5 almond antique sky peru orange 1788.73 1 1 1 1788.73 1018.1 6 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 2 1 1 1788.73 1018.1 6 6 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 3 1 1 1788.73 1018.1 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 4 1 1 1788.73 1018.1 6 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 5 1 1 1788.73 1018.1 6 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 6 1 1 1788.73 1018.1 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c, +count(*) over(partition by p_mfgr order by p_name) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 5 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: RANGE PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_6 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint), count_window_6 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as rn, +rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as r, +dense_rank() over(partition by p_mfgr order by p_name range between unbounded preceding and unbounded following) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as c, +count(*) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as cs +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c cs +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 12 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 12 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 5 2 2 99.68 NULL 4 5 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique metallic orange dim 55.39 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique misty red olive 1922.98 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.68 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique violet mint lemon 1375.42 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique sky peru orange 1788.73 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique blue firebrick mint 1789.69 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name range between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name rows between unbounded preceding and current row) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 1173.15 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4100.06 1173.15 1753.76 1366.6866666666667 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5853.820000000001 1173.15 1753.76 1463.4550000000002 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 7607.580000000001 1173.15 1753.76 1521.516 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 14011.01 1173.15 1753.76 1556.778888888889 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 14011.01 1173.15 1753.76 1556.778888888889 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 15643.67 1173.15 1753.76 1564.367 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3491.38 1690.68 1800.7 1745.69 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5292.08 1690.68 1800.7 1764.0266666666666 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.78 1690.68 1800.7 1773.195 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 10025.42 900.66 2031.98 1670.9033333333334 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1289.95 99.68 1190.27 644.975 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1880.22 99.68 1190.27 626.74 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 3401.3500000000004 1611.66 1789.69 1700.6750000000002 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_retailprice SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +vector_ptf_part_simple_text_decimal.p_mfgr vector_ptf_part_simple_text_decimal.p_name vector_ptf_part_simple_text_decimal.p_retailprice +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: decimal(38,18)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 10963.930000000000000000 1173.150000000000000000 1753.760000000000000000 1566.275714285714285714 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 12378.350000000000000000 1173.150000000000000000 1753.760000000000000000 1547.293750000000000000 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 9124.760000000000000000 1690.680000000000000000 2031.980000000000000000 1824.952000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 3125.880000000000000000 55.390000000000000000 1190.270000000000000000 625.176000000000000000 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 5048.860000000000000000 55.390000000000000000 1922.980000000000000000 841.476666666666666667 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 4426.600000000000000000 1206.260000000000000000 1844.920000000000000000 1475.533333333333333333 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 6801.740000000000000000 1611.660000000000000000 1789.690000000000000000 1700.435000000000000000 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 7819.840000000000000000 1018.100000000000000000 1789.690000000000000000 1563.968000000000000000 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_bigint EXPRESSION [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +p_mfgr p_name _c2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_bigint (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine burnished black steel 141442 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet chocolate turquoise 169068 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 59027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod NULL 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique metallic orange dim 5539 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique misty red olive 192298 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique chartreuse khaki white 9968 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 571695 120626 184492 142923.75 +Manufacturer#4 almond antique gainsboro frosted violet NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond antique violet mint lemon 137542 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 571695 120626 184492 142923.75 +Manufacturer#5 almond antique sky peru orange 178873 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique blue firebrick mint 178969 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 928432 101810 178969 154738.66666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_bigint (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1096393 117315 175376 156627.57142857142 +Manufacturer#1 almond aquamarine burnished black steel 141442 1237835 117315 175376 154729.375 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond antique violet chocolate turquoise 169068 169068 169068 169068 169068.0 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 912476 169068 203198 182495.2 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique chartreuse khaki white 9968 9968 9968 9968 9968.0 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 59027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod NULL 307049 9968 119027 76762.25 +Manufacturer#3 almond antique metallic orange dim 5539 312588 5539 119027 62517.6 +Manufacturer#3 almond antique misty red olive 192298 504886 5539 192298 84147.66666666667 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 137542 137542 137542 137542 137542.0 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 442660 120626 184492 147553.33333333334 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#5 almond antique blue firebrick mint 178969 178969 178969 178969 178969.0 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique sky peru orange 178873 680174 161166 178969 170043.5 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 781984 101810 178969 156396.8 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1753.76 1 +Manufacturer#1 NULL 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1632.66 1 +Manufacturer#1 1414.42 1 +Manufacturer#1 1753.76 1 +Manufacturer#1 1602.59 1 +Manufacturer#1 1632.66 1 +Manufacturer#2 900.66 1 +Manufacturer#2 2031.98 1 +Manufacturer#2 1000.6 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 1 +Manufacturer#2 1698.66 1 +Manufacturer#3 1337.29 1 +Manufacturer#3 590.27 1 +Manufacturer#3 NULL 1 +Manufacturer#3 55.39 1 +Manufacturer#3 1922.98 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 1190.27 1 +Manufacturer#3 99.68 1 +Manufacturer#4 1290.35 1 +Manufacturer#4 1844.92 1 +Manufacturer#4 NULL 1 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 1 +Manufacturer#4 1206.26 1 +Manufacturer#5 1788.73 1 +Manufacturer#5 1789.69 1 +Manufacturer#5 1464.48 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1611.66 1 +Manufacturer#5 1018.1 1 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_retailprice, +rank() over(partition by p_mfgr order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_retailprice r +Manufacturer#1 1173.15 1 +Manufacturer#1 1173.15 1 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1753.76 3 +Manufacturer#1 1602.59 7 +Manufacturer#1 1414.42 8 +Manufacturer#1 1632.66 9 +Manufacturer#1 NULL 9 +Manufacturer#1 1632.66 9 +Manufacturer#1 1632.66 9 +Manufacturer#2 1690.68 1 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 1800.7 2 +Manufacturer#2 2031.98 5 +Manufacturer#2 900.66 6 +Manufacturer#2 1698.66 6 +Manufacturer#2 1000.6 8 +Manufacturer#3 99.68 1 +Manufacturer#3 1190.27 2 +Manufacturer#3 590.27 2 +Manufacturer#3 1190.27 2 +Manufacturer#3 NULL 2 +Manufacturer#3 55.39 6 +Manufacturer#3 1922.98 7 +Manufacturer#3 1337.29 8 +Manufacturer#4 NULL 1 +Manufacturer#4 1375.42 2 +Manufacturer#4 NULL 3 +Manufacturer#4 1206.26 3 +Manufacturer#4 1844.92 5 +Manufacturer#4 1290.35 6 +Manufacturer#5 1789.69 1 +Manufacturer#5 1611.66 2 +Manufacturer#5 1611.66 2 +Manufacturer#5 1788.73 4 +Manufacturer#5 1018.1 5 +Manufacturer#5 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, timestamp, timestamp + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 1 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 1 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 1 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 1 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 1 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 1 +Manufacturer#3 almond antique forest lavender goldenrod NULL 1 +Manufacturer#3 almond antique metallic orange dim 55.39 1 +Manufacturer#3 almond antique misty red olive 1922.98 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 1 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 1 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 1 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 1 +Manufacturer#5 almond antique sky peru orange 1788.73 1 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 1 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp), p_name (type: string) + sort order: +++ + Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, timestamp, timestamp + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0, CASE WHEN ((_col0 = 'Manufacturer#2')) THEN (2000-01-01 00:00:00.0) ELSE (null) END + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_retailprice, +rank() over(partition by p_mfgr, case when p_mfgr == "Manufacturer#2" then timestamp "2000-01-01 00:00:00" end order by p_name) as r +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice r +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 6 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 +Manufacturer#3 almond antique forest lavender goldenrod NULL 2 +Manufacturer#3 almond antique metallic orange dim 55.39 6 +Manufacturer#3 almond antique misty red olive 1922.98 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 +Manufacturer#5 almond antique sky peru orange 1788.73 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 diff --git ql/src/test/results/clientpositive/vector_windowing.q.out ql/src/test/results/clientpositive/vector_windowing.q.out new file mode 100644 index 0000000..f1871cf --- /dev/null +++ ql/src/test/results/clientpositive/vector_windowing.q.out @@ -0,0 +1,9125 @@ +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + outputColumnNames: p_name, p_mfgr, p_size, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 5, 7] + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size _c3 r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 5, val 0) -> boolean + predicate: (p_size > 0) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size _c3 r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name cd +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique chartreuse lavender yellow 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 +Manufacturer#1 almond aquamarine burnished black steel 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 +Manufacturer#2 almond antique violet chocolate turquoise 1 +Manufacturer#2 almond antique violet turquoise frosted 2 +Manufacturer#2 almond aquamarine midnight light salmon 3 +Manufacturer#2 almond aquamarine rose maroon antique 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 +Manufacturer#3 almond antique chartreuse khaki white 1 +Manufacturer#3 almond antique forest lavender goldenrod 2 +Manufacturer#3 almond antique metallic orange dim 3 +Manufacturer#3 almond antique misty red olive 4 +Manufacturer#3 almond antique olive coral navajo 5 +Manufacturer#4 almond antique gainsboro frosted violet 1 +Manufacturer#4 almond antique violet mint lemon 2 +Manufacturer#4 almond aquamarine floral ivory bisque 3 +Manufacturer#4 almond aquamarine yellow dodger mint 4 +Manufacturer#4 almond azure aquamarine papaya violet 5 +Manufacturer#5 almond antique blue firebrick mint 1 +Manufacturer#5 almond antique medium spring khaki 2 +Manufacturer#5 almond antique sky peru orange 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 +Manufacturer#5 almond azure blanched chiffon midnight 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_2 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_3 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_4 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name r dr cd p_retailprice s1 p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 2346.3 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 3 1753.76 4100.06 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 4 1602.59 5702.65 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 5 4 5 1414.42 7117.07 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 6 1632.66 8749.73 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.36 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.62 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_2 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_3 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_4 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), round(sum_window_3, 2) (type: double), (_col5 - lag_window_4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sub1.r sub1.dr sub1.cd sub1.s1 sub1.deltasz +1 1 1 1620.67 0 +1 1 1 1671.68 0 +1 1 1 1690.68 0 +1 1 1 1789.69 0 +1 1 2 1173.15 0 +1 1 2 2346.3 0 +2 2 2 2861.95 -3 +2 2 2 2996.09 29 +2 2 2 3401.35 -25 +2 2 2 3491.38 26 +3 2 3 4100.06 32 +3 3 3 4202.35 -12 +3 3 3 4272.34 5 +3 3 3 5190.08 -4 +3 3 3 5523.36 -38 +4 3 4 5702.65 -28 +4 4 4 6047.27 -20 +4 4 4 6195.32 -18 +4 4 4 6208.18 44 +4 4 4 7222.02 23 +5 4 5 7117.07 22 +5 5 5 7337.62 5 +5 5 5 7532.61 44 +5 5 5 7672.66 -23 +5 5 5 8923.62 -7 +6 5 6 8749.73 14 +PREHOOK: query: explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: part + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + type: TABLE + Partition table definition + input alias: abc + name: noop + order by: _col1 ASC NULLS FIRST + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + partition by: _col2 + raw input shape: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 p_partkey (type: int) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: _col1:string, _col2:string, _col5:int, _col7:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: lag_window_3 + arguments: _col5, 1, _col5 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col7 (type: double), round(sum_window_2, 2) (type: double), _col5 (type: int), (_col5 - lag_window_3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +abc.p_mfgr abc.p_name r dr abc.p_retailprice s1 abc.p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 2346.3 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 3519.45 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 4692.6 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 5 2 1753.76 6446.36 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 1602.59 8048.95 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 7 4 1414.42 9463.37 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 1632.66 11096.03 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2031.98 5523.36 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 1290.35 7337.62 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.35 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) + sort order: ++- + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1, _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr s1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + window function definition + alias: first_value_window_1 + arguments: _col5 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: last_value_window_2 + arguments: _col5, false + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), first_value_window_1 (type: int), last_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s2 f l +Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 +Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 +Manufacturer#4 almond antique violet mint lemon 39 39 10 7 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 +Manufacturer#5 almond antique medium spring khaki 6 6 31 46 +Manufacturer#5 almond antique sky peru orange 2 2 31 23 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#3) -> boolean + predicate: (p_mfgr = 'Manufacturer#3') (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'Manufacturer#3' (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#3' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), VALUE._col4 (type: int) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 'Manufacturer#3' + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + window function definition + alias: first_value_window_2 + arguments: _col5 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: last_value_window_3 + arguments: _col5, false + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Manufacturer#3' (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: bigint), first_value_window_2 (type: int), last_value_window_3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r s2 f l +Manufacturer#3 almond antique chartreuse khaki white 17 1 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 3 19 17 45 +Manufacturer#3 almond antique misty red olive 1 4 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 5 45 19 45 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS CURRENT~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 38 2 +Manufacturer#1 almond antique burnished rose metallic 2 44 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 6 +Manufacturer#1 almond aquamarine burnished black steel 28 110 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 18 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 14 +Manufacturer#3 almond antique metallic orange dim 19 96 19 +Manufacturer#3 almond antique misty red olive 1 79 1 +Manufacturer#3 almond antique olive coral navajo 45 65 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 7 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 12 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 6 +Manufacturer#5 almond antique sky peru orange 2 108 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 +Manufacturer#3 almond antique metallic orange dim 19 3 3 +Manufacturer#3 almond antique misty red olive 1 4 4 +Manufacturer#3 almond antique olive coral navajo 45 5 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 +Manufacturer#4 almond antique violet mint lemon 39 2 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 +Manufacturer#5 almond antique medium spring khaki 6 2 2 +Manufacturer#5 almond antique sky peru orange 2 3 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: percent_rank_window_3 + arguments: _col1 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: ntile_window_4 + arguments: 3 + name: ntile + window function: GenericUDAFNTileEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: count_window_5 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_6 + arguments: _col5 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: stddev_window_7 + arguments: _col5 + name: stddev + window function: GenericUDAFStdEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: first_value_window_8 + arguments: (_col5 % 5) + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_9 + arguments: _col5 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, percent_rank_window_3, ntile_window_4, count_window_5, avg_window_6, stddev_window_7, first_value_window_8, last_value_window_9, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col5 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + dataColumns: rank_window_0:int, dense_rank_window_1:int, cume_dist_window_2:double, percent_rank_window_3:double, ntile_window_4:int, count_window_5:bigint, avg_window_6:double, stddev_window_7:double, first_value_window_8:int, last_value_window_9:int, _col1:string, _col2:string, _col5:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: int), VALUE._col9 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11, _col12, _col15 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col3: double, _col4: int, _col5: bigint, _col6: double, _col7: double, _col8: int, _col9: int, _col11: string, _col12: string, _col15: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col12 ASC NULLS FIRST, _col11 ASC NULLS FIRST + partition by: _col12 + raw input shape: + window functions: + window function definition + alias: first_value_window_10 + arguments: _col15 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: string), _col11 (type: string), _col15 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: int), _col9 (type: int), first_value_window_10 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr cud pr nt ca avg st fv lv fvw1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 0.4 2 3 12.666666666666666 15.084944665313014 2 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 0.6 2 4 11.0 13.379088160259652 2 6 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 0.8 3 5 14.4 13.763720427268202 2 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 1.0 3 6 19.0 16.237815945091466 2 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 0.0 1 1 14.0 0.0 4 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 0.25 1 2 27.0 13.0 4 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 0.5 2 3 18.666666666666668 15.86050300449376 4 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 0.75 2 4 20.25 14.00669482783144 4 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 1.0 3 5 19.8 12.560254774486067 4 18 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 0.0 1 1 17.0 0.0 2 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 0.25 1 2 15.5 1.5 2 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 0.5 2 3 16.666666666666668 2.0548046676563256 2 19 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 0.75 2 4 12.75 7.013380069552769 2 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 1.0 3 5 19.2 14.344336861632886 2 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 0.0 1 1 10.0 0.0 0 10 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 0.25 1 2 24.5 14.5 0 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 0.5 2 3 25.333333333333332 11.897712198383164 0 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 0.75 2 4 20.75 13.007209539328564 0 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 1.0 3 5 19.0 12.149074038789951 0 12 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 0.0 1 1 31.0 0.0 1 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 0.25 1 2 18.5 12.5 1 6 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 0.5 2 3 13.0 12.832251036613439 1 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 0.75 2 4 21.25 18.102140757380052 1 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 1.0 3 5 21.6 16.206171663906314 1 23 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_3 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), sum_window_3 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, sum_window_3, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), sum_window_3 (type: bigint), _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + includeColumns: [0, 1, 2, 3, 4, 5, 6] + dataColumns: rank_window_0:int, dense_rank_window_1:int, cume_dist_window_2:double, sum_window_3:bigint, _col1:string, _col2:string, _col5:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col5 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col3: bigint, _col5: string, _col6: string, _col9: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col9 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: sum_window_4 + arguments: _col9 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(5)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_4 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: bigint), _col5 (type: string), _col6 (type: string), _col9 (type: int) + outputColumnNames: sum_window_4, _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + Reduce Output Operator + key expressions: _col6 (type: string), _col5 (type: string) + sort order: ++ + Map-reduce partition columns: _col6 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: sum_window_4 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: bigint), _col9 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7] + dataColumns: sum_window_4:bigint, _col0:int, _col1:int, _col2:double, _col3:bigint, _col5:string, _col6:string, _col9:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: double), VALUE._col4 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col8 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col10 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: int, _col2: int, _col3: double, _col4: bigint, _col6: string, _col7: string, _col10: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST, _col6 ASC NULLS FIRST + partition by: _col7 + raw input shape: + window functions: + window function definition + alias: first_value_window_5 + arguments: _col10 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col6 (type: string), _col10 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: bigint), _col0 (type: bigint), first_value_window_5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r dr cud s1 s2 fv1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 38 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 44 10 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 72 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 114 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 54 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 56 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 81 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 99 32 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 17 31 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 31 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 50 50 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 51 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 96 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 10 17 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 49 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 76 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 83 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 95 29 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 37 8 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 39 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 85 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 108 23 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + window function definition + alias: count_window_1 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: count_window_0, count_window_1, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: count_window_0:bigint, count_window_1:bigint, _col1:string, _col2:string, _col5:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: bigint, _col3: string, _col4: string, _col7: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: first_value_window_2 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), _col0 (type: bigint), _col1 (type: bigint), first_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size c ca fvw1 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: max_window_2 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: avg_window_3 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double), min_window_1 (type: double), max_window_2 (type: double), round(avg_window_3, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s mi ma ag +Manufacturer#1 almond antique burnished rose metallic 2 4100.06 1173.15 1753.76 1366.69 +Manufacturer#1 almond antique burnished rose metallic 2 5702.65 1173.15 1753.76 1425.66 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.07 1173.15 1753.76 1423.41 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 7576.58 1173.15 1753.76 1515.32 +Manufacturer#1 almond aquamarine burnished black steel 28 6403.43 1414.42 1753.76 1600.86 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 4649.67 1414.42 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 5523.36 1690.68 2031.98 1841.12 +Manufacturer#2 almond antique violet turquoise frosted 40 7222.02 1690.68 2031.98 1805.51 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 1690.68 2031.98 1784.72 +Manufacturer#2 almond aquamarine rose maroon antique 25 7232.94 1698.66 2031.98 1808.24 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5432.24 1698.66 2031.98 1810.75 +Manufacturer#3 almond antique chartreuse khaki white 17 4272.34 1190.27 1671.68 1424.11 +Manufacturer#3 almond antique forest lavender goldenrod 14 6195.32 1190.27 1922.98 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 7532.61 1190.27 1922.98 1506.52 +Manufacturer#3 almond antique misty red olive 1 5860.93 1190.27 1922.98 1465.23 +Manufacturer#3 almond antique olive coral navajo 45 4670.66 1337.29 1922.98 1556.89 +Manufacturer#4 almond antique gainsboro frosted violet 10 4202.35 1206.26 1620.67 1400.78 +Manufacturer#4 almond antique violet mint lemon 39 6047.27 1206.26 1844.92 1511.82 +Manufacturer#4 almond aquamarine floral ivory bisque 27 7337.62 1206.26 1844.92 1467.52 +Manufacturer#4 almond aquamarine yellow dodger mint 7 5716.95 1206.26 1844.92 1429.24 +Manufacturer#4 almond azure aquamarine papaya violet 12 4341.53 1206.26 1844.92 1447.18 +Manufacturer#5 almond antique blue firebrick mint 31 5190.08 1611.66 1789.69 1730.03 +Manufacturer#5 almond antique medium spring khaki 6 6208.18 1018.1 1789.69 1552.05 +Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.53 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 5882.97 1018.1 1788.73 1470.74 +Manufacturer#5 almond azure blanched chiffon midnight 23 4271.31 1018.1 1788.73 1423.77 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + outputColumnNames: p_name, p_mfgr, p_size, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 5, 7] + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice), max(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double, VectorUDAFMaxDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 2, col 5, col 7 + native: false + projectedOutputColumns: [0, 1] + keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: double), _col5 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: _col0:string, _col1:string, _col2:int, _col3:double, _col4:double, _col5:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double, _col4: double, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col3 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: avg_window_1 + arguments: _col3 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), round(sum_window_0, 2) (type: double), _col4 (type: double), _col5 (type: double), round(avg_window_1, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size p_retailprice s mi ma ag +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.83 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.32 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.86 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.36 1690.68 1690.68 1841.12 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.51 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.72 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.94 1698.66 1698.66 1808.24 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.75 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.11 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.52 +Manufacturer#3 almond antique misty red olive 1 1922.98 5860.93 1922.98 1922.98 1465.23 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.89 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.78 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.82 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.62 1206.26 1206.26 1467.52 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.95 1844.92 1844.92 1429.24 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.53 1290.35 1290.35 1447.18 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.03 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.05 +Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.53 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.97 1018.1 1018.1 1470.74 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.31 1464.48 1464.48 1423.77 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: stddev_window_0 + arguments: _col7 + name: stddev + window function: GenericUDAFStdEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: stddev_pop_window_1 + arguments: _col7 + name: stddev_pop + window function: GenericUDAFStdEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: collect_set_window_2 + arguments: _col5 + name: collect_set + window function: GenericUDAFMkCollectionEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: variance_window_3 + arguments: _col7 + name: variance + window function: GenericUDAFVarianceEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: corr_window_4 + arguments: _col5, _col7 + name: corr + window function: GenericUDAFCorrelationEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: covar_pop_window_5 + arguments: _col5, _col7 + name: covar_pop + window function: GenericUDAFCovarianceEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), stddev_window_0 (type: double), stddev_pop_window_1 (type: double), collect_set_window_2 (type: array), variance_window_3 (type: double), round(corr_window_4, 5) (type: double), covar_pop_window_5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size sdev sdev_pop uniq_size var cor covarp +Manufacturer#1 almond antique burnished rose metallic 2 258.10677784349235 258.10677784349235 [2,34,6] 66619.10876874991 0.81133 2801.7074999999995 +Manufacturer#1 almond antique burnished rose metallic 2 273.70217881648074 273.70217881648074 [2,34] 74912.8826888888 1.0 4128.782222222221 +Manufacturer#1 almond antique chartreuse lavender yellow 34 230.90151585470358 230.90151585470358 [2,34,6,28] 53315.51002399992 0.69564 2210.7864 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 202.73109328368946 202.73109328368946 [2,34,6,28,42] 41099.896184 0.63079 2009.9536000000007 +Manufacturer#1 almond aquamarine burnished black steel 28 121.6064517973862 121.6064517973862 [34,6,28,42] 14788.129118750014 0.20367 331.1337500000004 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 96.5751586416853 96.5751586416853 [6,28,42] 9326.761266666683 -1.4E-4 -0.20666666666708502 +Manufacturer#2 almond antique violet chocolate turquoise 14 142.2363169751898 142.2363169751898 [14,40,2] 20231.169866666663 -0.4937 -1113.7466666666658 +Manufacturer#2 almond antique violet turquoise frosted 40 137.76306498840682 137.76306498840682 [14,40,2,25] 18978.662075 -0.52056 -1004.4812499999995 +Manufacturer#2 almond aquamarine midnight light salmon 2 130.03972279269132 130.03972279269132 [14,40,2,25,18] 16910.329504000005 -0.46909 -766.1791999999995 +Manufacturer#2 almond aquamarine rose maroon antique 25 135.55100986344584 135.55100986344584 [40,2,25,18] 18374.07627499999 -0.60914 -1128.1787499999987 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 156.44019460768044 156.44019460768044 [2,25,18] 24473.534488888927 -0.95717 -1441.4466666666676 +Manufacturer#3 almond antique chartreuse khaki white 17 196.7742266885805 196.7742266885805 [17,14,19] 38720.09628888887 0.55572 224.6944444444446 +Manufacturer#3 almond antique forest lavender goldenrod 14 275.14144189852607 275.14144189852607 [17,14,19,1] 75702.81305 -0.67208 -1296.9000000000003 +Manufacturer#3 almond antique metallic orange dim 19 260.23473614412046 260.23473614412046 [17,14,19,1,45] 67722.117896 -0.57035 -2129.0664 +Manufacturer#3 almond antique misty red olive 1 275.9139962356932 275.9139962356932 [14,19,1,45] 76128.53331875012 -0.57748 -2547.7868749999993 +Manufacturer#3 almond antique olive coral navajo 45 260.5815918713796 260.5815918713796 [19,1,45] 67902.76602222225 -0.87107 -4099.731111111111 +Manufacturer#4 almond antique gainsboro frosted violet 10 170.13011889596618 170.13011889596618 [10,39,27] 28944.25735555559 -0.6657 -1347.4777777777779 +Manufacturer#4 almond antique violet mint lemon 39 242.26834609323197 242.26834609323197 [10,39,27,7] 58693.95151875002 -0.80519 -2537.328125 +Manufacturer#4 almond aquamarine floral ivory bisque 27 234.10001662537326 234.10001662537326 [10,39,27,7,12] 54802.817784000035 -0.60469 -1719.8079999999995 +Manufacturer#4 almond aquamarine yellow dodger mint 7 247.3342714197732 247.3342714197732 [39,27,7,12] 61174.24181875003 -0.55087 -1719.0368749999975 +Manufacturer#4 almond azure aquamarine papaya violet 12 283.3344330566893 283.3344330566893 [27,7,12] 80278.40095555557 -0.77557 -1867.4888888888881 +Manufacturer#5 almond antique blue firebrick mint 31 83.69879024746363 83.69879024746363 [31,6,2] 7005.487488888913 0.39004 418.9233333333353 +Manufacturer#5 almond antique medium spring khaki 6 316.68049612345885 316.68049612345885 [31,6,2,46] 100286.53662500004 -0.71361 -4090.853749999999 +Manufacturer#5 almond antique sky peru orange 2 285.40506298242155 285.40506298242155 [31,6,2,46,23] 81456.04997600002 -0.71286 -3297.2011999999986 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 285.43749038756283 285.43749038756283 [6,2,46,23] 81474.56091875004 -0.98413 -4871.028125000002 +Manufacturer#5 almond azure blanched chiffon midnight 23 315.9225931564038 315.9225931564038 [2,46,23] 99807.08486666664 -0.99789 -5664.856666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: histogram_numeric_window_0 + arguments: _col7, 5 + name: histogram_numeric + window function: GenericUDAFHistogramNumericEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: percentile_window_1 + arguments: _col0, 0.5 + name: percentile + window function: GenericUDAFBridgeEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), histogram_numeric_window_0 (type: array>), percentile_window_1 (type: double), row_number_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size hist per rn +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 115872.0 2 +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1753.76,"y":1.0}] 121152.0 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 [{"x":1173.15,"y":2.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 110592.0 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 [{"x":1173.15,"y":1.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86428.0 4 +Manufacturer#1 almond aquamarine burnished black steel 28 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86098.0 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0}] 86428.0 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 [{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 1 +Manufacturer#2 almond antique violet turquoise frosted 40 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 139825.5 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 169347.0 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 5 +Manufacturer#3 almond antique chartreuse khaki white 17 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}] 90681.0 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 65831.5 2 +Manufacturer#3 almond antique metallic orange dim 19 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 90681.0 3 +Manufacturer#3 almond antique misty red olive 1 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 76690.0 4 +Manufacturer#3 almond antique olive coral navajo 45 [{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 112398.0 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0}] 48427.0 1 +Manufacturer#4 almond antique violet mint lemon 39 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 46844.0 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 45261.0 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1844.92,"y":1.0}] 39309.0 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1844.92,"y":1.0}] 33357.0 5 +Manufacturer#5 almond antique blue firebrick mint 31 [{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 155733.0 1 +Manufacturer#5 almond antique medium spring khaki 6 [{"x":1018.1,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 99201.0 2 +Manufacturer#5 almond antique sky peru orange 2 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 78486.0 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0}] 60577.5 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1788.73,"y":1.0}] 78486.0 5 +PREHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +POSTHOOK: type: CREATEVIEW +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Create View Operator: + Create View + if not exists: true + or replace: false + columns: p_mfgr string, p_brand string, s double + expanded text: select `part`.`p_mfgr`, `part`.`p_brand`, +round(sum(`part`.`p_retailprice`),2) as `s` +from `default`.`part` +group by `part`.`p_mfgr`, `part`.`p_brand` + name: default.mfgr_price_view + original text: select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand + rewrite enabled: false + +PREHOOK: query: create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@mfgr_price_view +POSTHOOK: query: create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mfgr_price_view +POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +p_mfgr p_brand s +PREHOOK: query: explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + properties: + insideView TRUE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Select Operator + expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) + outputColumnNames: p_mfgr, p_brand, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 7] + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 3 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_brand (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: round(_col2, 2) + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), round(_col2, 2) (type: double), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: _col0:string, _col1:string, _col2:double, _col3:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sq.p_mfgr sq.p_brand sq.s sq.s1 +Manufacturer#1 Brand#12 4800.84 8749.73 +Manufacturer#1 Brand#14 2346.3 8749.73 +Manufacturer#1 Brand#15 1602.59 8749.73 +Manufacturer#2 Brand#22 3491.38 8923.62 +Manufacturer#2 Brand#23 2031.98 8923.62 +Manufacturer#2 Brand#24 1698.66 8923.62 +Manufacturer#2 Brand#25 1701.6 8923.62 +Manufacturer#3 Brand#31 1671.68 7532.61 +Manufacturer#3 Brand#32 3333.37 7532.61 +Manufacturer#3 Brand#34 1337.29 7532.61 +Manufacturer#3 Brand#35 1190.27 7532.61 +Manufacturer#4 Brand#41 4755.94 7337.62 +Manufacturer#4 Brand#42 2581.68 7337.62 +Manufacturer#5 Brand#51 1611.66 7672.66 +Manufacturer#5 Brand#52 3254.17 7672.66 +Manufacturer#5 Brand#53 2806.83 7672.66 +PREHOOK: query: select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_brand s s1 +Manufacturer#1 Brand#12 4800.84 4800.84 +Manufacturer#1 Brand#14 2346.3 7147.14 +Manufacturer#1 Brand#15 1602.59 8749.73 +Manufacturer#2 Brand#22 3491.38 3491.38 +Manufacturer#2 Brand#23 2031.98 5523.36 +Manufacturer#2 Brand#24 1698.66 7222.02 +Manufacturer#2 Brand#25 1701.6 5432.24 +Manufacturer#3 Brand#31 1671.68 1671.68 +Manufacturer#3 Brand#32 3333.37 5005.05 +Manufacturer#3 Brand#34 1337.29 6342.34 +Manufacturer#3 Brand#35 1190.27 5860.93 +Manufacturer#4 Brand#41 4755.94 4755.94 +Manufacturer#4 Brand#42 2581.68 7337.62 +Manufacturer#5 Brand#51 1611.66 1611.66 +Manufacturer#5 Brand#52 3254.17 4865.83 +Manufacturer#5 Brand#53 2806.83 7672.66 +PREHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +POSTHOOK: type: CREATEVIEW +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Create View Operator: + Create View + if not exists: true + or replace: false + columns: p_mfgr string, p_brand string, s double + expanded text: select `part`.`p_mfgr`, `part`.`p_brand`, +round(sum(`part`.`p_retailprice`) over w1,2) as `s` +from `default`.`part` +window w1 as (distribute by `part`.`p_mfgr` sort by `part`.`p_name` rows between 2 preceding and current row) + name: default.mfgr_brand_price_view + original text: select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) + rewrite enabled: false + +PREHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@mfgr_brand_price_view +POSTHOOK: query: create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mfgr_brand_price_view +POSTHOOK: Lineage: mfgr_brand_price_view.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_brand_price_view.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: mfgr_brand_price_view.s SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +p_mfgr p_brand s +PREHOOK: query: explain vectorization detail +select * from mfgr_brand_price_view +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from mfgr_brand_price_view +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + properties: + insideView TRUE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_brand (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col3, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col3: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(2)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from mfgr_brand_price_view +PREHOOK: type: QUERY +PREHOOK: Input: default@mfgr_brand_price_view +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from mfgr_brand_price_view +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mfgr_brand_price_view +POSTHOOK: Input: default@part +#### A masked pattern was here #### +mfgr_brand_price_view.p_mfgr mfgr_brand_price_view.p_brand mfgr_brand_price_view.s +Manufacturer#1 Brand#12 4100.06 +Manufacturer#1 Brand#12 4649.67 +Manufacturer#1 Brand#12 4770.77 +Manufacturer#1 Brand#14 1173.15 +Manufacturer#1 Brand#14 2346.3 +Manufacturer#1 Brand#15 4529.5 +Manufacturer#2 Brand#22 1690.68 +Manufacturer#2 Brand#22 3491.38 +Manufacturer#2 Brand#23 5523.36 +Manufacturer#2 Brand#24 5531.34 +Manufacturer#2 Brand#25 5432.24 +Manufacturer#3 Brand#31 1671.68 +Manufacturer#3 Brand#32 4272.34 +Manufacturer#3 Brand#32 4523.64 +Manufacturer#3 Brand#34 4670.66 +Manufacturer#3 Brand#35 2861.95 +Manufacturer#4 Brand#41 1620.67 +Manufacturer#4 Brand#41 4341.53 +Manufacturer#4 Brand#41 4426.6 +Manufacturer#4 Brand#42 2996.09 +Manufacturer#4 Brand#42 4202.35 +Manufacturer#5 Brand#51 3401.35 +Manufacturer#5 Brand#52 1789.69 +Manufacturer#5 Brand#52 4271.31 +Manufacturer#5 Brand#53 4418.49 +Manufacturer#5 Brand#53 5190.08 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int), array(1,2,3) (type: array) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Select Operator + expressions: _col3 (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Select expression for SELECT operator: Data type array of GenericUDFArray(Const int 1, Const int 2, Const int 3) not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col4: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~CURRENT + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name lv_col p_size s +Manufacturer#1 almond antique burnished rose metallic 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 2 4 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1 34 90 +Manufacturer#1 almond antique chartreuse lavender yellow 2 34 96 +Manufacturer#1 almond antique chartreuse lavender yellow 3 34 102 +Manufacturer#1 almond antique salmon chartreuse burlywood 1 6 10 +Manufacturer#1 almond antique salmon chartreuse burlywood 2 6 14 +Manufacturer#1 almond antique salmon chartreuse burlywood 3 6 18 +Manufacturer#1 almond aquamarine burnished black steel 1 28 40 +Manufacturer#1 almond aquamarine burnished black steel 2 28 62 +Manufacturer#1 almond aquamarine burnished black steel 3 28 84 +Manufacturer#1 almond aquamarine pink moccasin thistle 1 42 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 2 42 118 +Manufacturer#1 almond aquamarine pink moccasin thistle 3 42 126 +Manufacturer#2 almond antique violet chocolate turquoise 1 14 18 +Manufacturer#2 almond antique violet chocolate turquoise 2 14 30 +Manufacturer#2 almond antique violet chocolate turquoise 3 14 42 +Manufacturer#2 almond antique violet turquoise frosted 1 40 90 +Manufacturer#2 almond antique violet turquoise frosted 2 40 105 +Manufacturer#2 almond antique violet turquoise frosted 3 40 120 +Manufacturer#2 almond aquamarine midnight light salmon 1 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 4 +Manufacturer#2 almond aquamarine midnight light salmon 3 2 6 +Manufacturer#2 almond aquamarine rose maroon antique 1 25 61 +Manufacturer#2 almond aquamarine rose maroon antique 2 25 68 +Manufacturer#2 almond aquamarine rose maroon antique 3 25 75 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 18 46 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 2 18 50 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 3 18 54 +Manufacturer#3 almond antique chartreuse khaki white 1 17 45 +Manufacturer#3 almond antique chartreuse khaki white 2 17 48 +Manufacturer#3 almond antique chartreuse khaki white 3 17 51 +Manufacturer#3 almond antique forest lavender goldenrod 1 14 16 +Manufacturer#3 almond antique forest lavender goldenrod 2 14 29 +Manufacturer#3 almond antique forest lavender goldenrod 3 14 42 +Manufacturer#3 almond antique metallic orange dim 1 19 53 +Manufacturer#3 almond antique metallic orange dim 2 19 55 +Manufacturer#3 almond antique metallic orange dim 3 19 57 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique misty red olive 2 1 2 +Manufacturer#3 almond antique misty red olive 3 1 3 +Manufacturer#3 almond antique olive coral navajo 1 45 83 +Manufacturer#3 almond antique olive coral navajo 2 45 109 +Manufacturer#3 almond antique olive coral navajo 3 45 135 +Manufacturer#4 almond antique gainsboro frosted violet 1 10 24 +Manufacturer#4 almond antique gainsboro frosted violet 2 10 27 +Manufacturer#4 almond antique gainsboro frosted violet 3 10 30 +Manufacturer#4 almond antique violet mint lemon 1 39 93 +Manufacturer#4 almond antique violet mint lemon 2 39 105 +Manufacturer#4 almond antique violet mint lemon 3 39 117 +Manufacturer#4 almond aquamarine floral ivory bisque 1 27 51 +Manufacturer#4 almond aquamarine floral ivory bisque 2 27 66 +Manufacturer#4 almond aquamarine floral ivory bisque 3 27 81 +Manufacturer#4 almond aquamarine yellow dodger mint 1 7 7 +Manufacturer#4 almond aquamarine yellow dodger mint 2 7 14 +Manufacturer#4 almond aquamarine yellow dodger mint 3 7 21 +Manufacturer#4 almond azure aquamarine papaya violet 1 12 32 +Manufacturer#4 almond azure aquamarine papaya violet 2 12 34 +Manufacturer#4 almond azure aquamarine papaya violet 3 12 36 +Manufacturer#5 almond antique blue firebrick mint 1 31 77 +Manufacturer#5 almond antique blue firebrick mint 2 31 85 +Manufacturer#5 almond antique blue firebrick mint 3 31 93 +Manufacturer#5 almond antique medium spring khaki 1 6 10 +Manufacturer#5 almond antique medium spring khaki 2 6 14 +Manufacturer#5 almond antique medium spring khaki 3 6 18 +Manufacturer#5 almond antique sky peru orange 1 2 2 +Manufacturer#5 almond antique sky peru orange 2 2 4 +Manufacturer#5 almond antique sky peru orange 3 2 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1 46 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 2 46 123 +Manufacturer#5 almond aquamarine dodger light gainsboro 3 46 138 +Manufacturer#5 almond azure blanched chiffon midnight 1 23 35 +Manufacturer#5 almond azure blanched chiffon midnight 2 23 52 +Manufacturer#5 almond azure blanched chiffon midnight 3 23 69 +PREHOOK: query: CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_1 +POSTHOOK: query: CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_1 +PREHOOK: query: CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_2 +POSTHOOK: query: CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_2 +PREHOOK: query: CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_3 +POSTHOOK: query: CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_3 +PREHOOK: query: explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-9 + Stage-2 depends on stages: Stage-10 + Stage-11 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: sum_window_2 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: cume_dist_window_2 + arguments: _col1 + name: cume_dist + window function: GenericUDAFCumeDistEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: rank_window_0:int, dense_rank_window_1:int, cume_dist_window_2:double, _col1:string, _col2:string, _col5:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double), VALUE._col4 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: double, _col4: string, _col5: string, _col8: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col8 ASC NULLS FIRST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: sum_window_3 + arguments: _col8 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(5)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_3 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col4 (type: string), _col5 (type: string), _col8 (type: int) + outputColumnNames: sum_window_3, _col0, _col1, _col2, _col4, _col5, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Reduce Output Operator + key expressions: _col5 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col5 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: sum_window_3 (type: bigint), _col0 (type: int), _col1 (type: int), _col2 (type: double), _col8 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + includeColumns: [0, 1, 2, 3, 4, 5, 6] + dataColumns: sum_window_3:bigint, _col0:int, _col1:int, _col2:double, _col4:string, _col5:string, _col8:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: double), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: int, _col2: int, _col3: double, _col5: string, _col6: string, _col9: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col6 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: first_value_window_4 + arguments: _col9 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col5 (type: string), _col9 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int), UDFToDouble(round(_col0, 1)) (type: double), first_value_window_4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_2 + + Stage: Stage-8 + Stats-Aggr Operator + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: count_window_0 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + isStar: true + window function definition + alias: count_window_1 + arguments: _col5 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: count_window_0, count_window_1, _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: count_window_0:bigint, count_window_1:bigint, _col1:string, _col2:string, _col5:int + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: bigint, _col1: bigint, _col3: string, _col4: string, _col7: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST + partition by: _col4 + raw input shape: + window functions: + window function definition + alias: first_value_window_2 + arguments: _col7 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), first_value_window_2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_3 + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_3 + + Stage: Stage-11 + Stats-Aggr Operator + +PREHOOK: query: from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Output: default@part_1 +PREHOOK: Output: default@part_2 +PREHOOK: Output: default@part_3 +POSTHOOK: query: from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part_1 +POSTHOOK: Output: default@part_2 +POSTHOOK: Output: default@part_3 +POSTHOOK: Lineage: part_1.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_1.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_1.s SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.cud SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.dr SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.fv1 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_2.r SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_2.s2 SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.c SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.ca SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.fv SCRIPT [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), (part)part.FieldSchema(name:p_name, type:string, comment:null), (part)part.FieldSchema(name:p_mfgr, type:string, comment:null), (part)part.FieldSchema(name:p_brand, type:string, comment:null), (part)part.FieldSchema(name:p_type, type:string, comment:null), (part)part.FieldSchema(name:p_size, type:int, comment:null), (part)part.FieldSchema(name:p_container, type:string, comment:null), (part)part.FieldSchema(name:p_retailprice, type:double, comment:null), (part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_3.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +_col0 _col1 _col2 _col3 _col4 _col5 +PREHOOK: query: select * from part_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_1 +#### A masked pattern was here #### +part_1.p_mfgr part_1.p_name part_1.p_size part_1.r part_1.dr part_1.s +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 +PREHOOK: query: select * from part_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_2 +#### A masked pattern was here #### +part_2.p_mfgr part_2.p_name part_2.p_size part_2.r part_2.dr part_2.cud part_2.s2 part_2.fv1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0 34.0 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0 10.0 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0 28.0 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1 42.0 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0 14.0 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0 40.0 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0 2.0 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0 25.0 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1 32.0 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0 31.0 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0 14.0 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0 50.0 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0 1.0 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1 45.0 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0 17.0 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0 39.0 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0 27.0 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0 7.0 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1 29.0 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0 31.0 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0 8.0 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0 2.0 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0 46.0 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1 23.0 2 +PREHOOK: query: select * from part_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_3 +#### A masked pattern was here #### +part_3.p_mfgr part_3.p_name part_3.p_size part_3.c part_3.ca part_3.fv +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 5, val 0) -> boolean + predicate: (p_size > 0) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 1, col 5 + native: false + projectedOutputColumns: [0] + keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: int, _col3: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_1 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: lag_window_2 + arguments: _col2, 1, _col2 + name: lag + window function: GenericUDAFLagEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size mi r dr p_size deltasz +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(10)~CURRENT + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE CURRENT~FOLLOWING(10) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s2 s1 +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique chartreuse lavender yellow 34 62 76 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 6 +Manufacturer#1 almond aquamarine burnished black steel 28 28 62 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 32 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 43 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 43 +Manufacturer#3 almond antique chartreuse khaki white 17 31 36 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 50 +Manufacturer#3 almond antique metallic orange dim 19 50 19 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique olive coral navajo 45 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 17 22 +Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 29 +Manufacturer#4 almond azure aquamarine papaya violet 12 29 12 +Manufacturer#5 almond antique blue firebrick mint 31 54 31 +Manufacturer#5 almond antique medium spring khaki 6 8 6 +Manufacturer#5 almond antique sky peru orange 2 2 8 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 54 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s s2 +Manufacturer#1 almond antique burnished rose metallic 2 38 4 +Manufacturer#1 almond antique burnished rose metallic 2 44 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 44 +Manufacturer#1 almond aquamarine burnished black steel 28 110 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 99 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 31 +Manufacturer#3 almond antique metallic orange dim 19 96 50 +Manufacturer#3 almond antique misty red olive 1 79 51 +Manufacturer#3 almond antique olive coral navajo 45 65 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 95 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 37 +Manufacturer#5 almond antique sky peru orange 2 108 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 6 +Manufacturer#1 almond aquamarine burnished black steel 28 28 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 14 +Manufacturer#3 almond antique metallic orange dim 19 19 19 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique olive coral navajo 45 45 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 7 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 6 +Manufacturer#5 almond antique sky peru orange 2 2 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 23 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 +Manufacturer#1 almond antique burnished rose metallic 2 4 2 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 +Manufacturer#3 almond antique misty red olive 1 1 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 37 +Manufacturer#5 almond antique sky peru orange 2 2 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint), sum_window_1 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 44 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 99 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 50 +Manufacturer#3 almond antique misty red olive 1 1 51 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 10 +Manufacturer#4 almond antique violet mint lemon 39 39 49 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 95 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 6 37 37 +Manufacturer#5 almond antique sky peru orange 2 2 39 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 108 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(2)~FOLLOWING(2) + window function definition + alias: sum_window_1 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: sum_window_2 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint), sum_window_1 (type: bigint), sum_window_2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 38 +Manufacturer#1 almond antique burnished rose metallic 2 4 4 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 112 +Manufacturer#1 almond aquamarine burnished black steel 28 28 72 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 45 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 51 +Manufacturer#3 almond antique metallic orange dim 19 19 50 96 +Manufacturer#3 almond antique misty red olive 1 1 51 79 +Manufacturer#3 almond antique olive coral navajo 45 45 96 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 76 +Manufacturer#4 almond antique violet mint lemon 39 39 49 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 46 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 39 +Manufacturer#5 almond antique medium spring khaki 6 6 37 85 +Manufacturer#5 almond antique sky peru orange 2 2 39 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 85 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 108 71 +PREHOOK: query: explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(2)~FOLLOWING(2) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: _col0:string, _col1:string, _col2:int, _col3:bigint + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size r +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col6 (type: double) + outputColumnNames: _col2, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col7 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_4 + arguments: _col7 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), round(sum_window_0, 2) (type: double), min_window_1 (type: double), max_window_2 (type: double), round(avg_window_3, 2) (type: double), count_window_4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr s1 s2 s3 s4 s5 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col2, _col1 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col7 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: double), min_window_1 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: sum_window_0, min_window_1, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + includeColumns: [0, 1, 2, 3, 4, 5] + dataColumns: sum_window_0:double, min_window_1:double, _col1:string, _col2:string, _col5:int, _col7:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double), VALUE._col1 (type: double), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col5 (type: int), VALUE._col7 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: double, _col1: double, _col3: string, _col4: string, _col7: int, _col9: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS FIRST + partition by: _col4, _col3 + raw input shape: + window functions: + window function definition + alias: max_window_2 + arguments: _col9 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col3 (type: string), _col7 (type: int), round(_col0, 2) (type: double), _col1 (type: double), max_window_2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 s2 s3 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 1753.76 1753.76 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 1602.59 1602.59 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 1414.42 1414.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 1632.66 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 1800.7 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 2031.98 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 1698.66 1698.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 1701.6 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1671.68 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 1190.27 1190.27 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 1410.39 1410.39 +Manufacturer#3 almond antique misty red olive 1 1922.98 1922.98 1922.98 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 1337.29 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1620.67 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 1206.26 1206.26 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 1844.92 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 1290.35 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 +Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), substr(p_type, 2) (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_type (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 4] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col2: string, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: substr(_col4, 2) ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: substr(_col4, 2) + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col4 (type: string), substr(_col4, 2) (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_type short_ptype r +Manufacturer#1 LARGE BRUSHED STEEL ARGE BRUSHED STEEL 1 +Manufacturer#1 LARGE BURNISHED STEEL ARGE BURNISHED STEEL 2 +Manufacturer#1 PROMO BURNISHED NICKEL ROMO BURNISHED NICKEL 3 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 STANDARD ANODIZED STEEL TANDARD ANODIZED STEEL 6 +Manufacturer#2 ECONOMY POLISHED STEEL CONOMY POLISHED STEEL 1 +Manufacturer#2 MEDIUM ANODIZED COPPER EDIUM ANODIZED COPPER 2 +Manufacturer#2 MEDIUM BURNISHED COPPER EDIUM BURNISHED COPPER 3 +Manufacturer#2 SMALL POLISHED NICKEL MALL POLISHED NICKEL 4 +Manufacturer#2 STANDARD PLATED TIN TANDARD PLATED TIN 5 +Manufacturer#3 ECONOMY PLATED COPPER CONOMY PLATED COPPER 1 +Manufacturer#3 MEDIUM BURNISHED BRASS EDIUM BURNISHED BRASS 2 +Manufacturer#3 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#3 PROMO ANODIZED TIN ROMO ANODIZED TIN 4 +Manufacturer#3 STANDARD POLISHED STEEL TANDARD POLISHED STEEL 5 +Manufacturer#4 ECONOMY BRUSHED COPPER CONOMY BRUSHED COPPER 1 +Manufacturer#4 PROMO POLISHED STEEL ROMO POLISHED STEEL 4 +Manufacturer#4 SMALL BRUSHED BRASS MALL BRUSHED BRASS 2 +Manufacturer#4 SMALL PLATED STEEL MALL PLATED STEEL 3 +Manufacturer#4 STANDARD ANODIZED TIN TANDARD ANODIZED TIN 5 +Manufacturer#5 ECONOMY BURNISHED STEEL CONOMY BURNISHED STEEL 2 +Manufacturer#5 LARGE BRUSHED BRASS ARGE BRUSHED BRASS 1 +Manufacturer#5 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#5 SMALL PLATED BRASS MALL PLATED BRASS 4 +Manufacturer#5 STANDARD BURNISHED TIN TANDARD BURNISHED TIN 5 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 5702.65 +Manufacturer#1 almond aquamarine burnished black steel 28 7117.07 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 5523.36 +Manufacturer#2 almond aquamarine rose maroon antique 25 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 4272.34 +Manufacturer#3 almond antique misty red olive 1 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 7337.62 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 3401.35 +Manufacturer#5 almond antique sky peru orange 2 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 7672.66 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.07 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3948.89 +Manufacturer#1 almond aquamarine burnished black steel 28 5363.31 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.73 +Manufacturer#2 almond antique violet chocolate turquoise 14 3722.66 +Manufacturer#2 almond antique violet turquoise frosted 40 8923.62 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 25 7122.92 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5424.26 +Manufacturer#3 almond antique chartreuse khaki white 17 4784.93 +Manufacturer#3 almond antique forest lavender goldenrod 14 3113.25 +Manufacturer#3 almond antique metallic orange dim 19 6195.32 +Manufacturer#3 almond antique misty red olive 1 1922.98 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 3465.59 +Manufacturer#4 almond antique violet mint lemon 39 7337.62 +Manufacturer#4 almond aquamarine floral ivory bisque 27 5962.2 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 12 4755.94 +Manufacturer#5 almond antique blue firebrick mint 31 6654.56 +Manufacturer#5 almond antique medium spring khaki 6 3400.39 +Manufacturer#5 almond antique sky peru orange 2 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 7672.66 +Manufacturer#5 almond azure blanched chiffon midnight 23 4864.87 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 7576.58 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique chartreuse lavender yellow 34 6403.43 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4649.67 +Manufacturer#1 almond aquamarine burnished black steel 28 3047.08 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 8923.62 +Manufacturer#2 almond antique violet turquoise frosted 40 7232.94 +Manufacturer#2 almond aquamarine midnight light salmon 2 5432.24 +Manufacturer#2 almond aquamarine rose maroon antique 25 3400.26 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 7532.61 +Manufacturer#3 almond antique forest lavender goldenrod 14 5860.93 +Manufacturer#3 almond antique metallic orange dim 19 4670.66 +Manufacturer#3 almond antique misty red olive 1 3260.27 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 7337.62 +Manufacturer#4 almond antique violet mint lemon 39 5716.95 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4341.53 +Manufacturer#4 almond aquamarine yellow dodger mint 7 3135.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 7672.66 +Manufacturer#5 almond antique medium spring khaki 6 5882.97 +Manufacturer#5 almond antique sky peru orange 2 4271.31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 2482.58 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 +PREHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE CURRENT~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr p_name p_size s1 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique burnished rose metallic 2 8749.73 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3386.42 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6403.43 +Manufacturer#1 almond aquamarine burnished black steel 28 4800.84 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 6891.64 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 +Manufacturer#2 almond aquamarine rose maroon antique 25 3499.36 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5200.96 +Manufacturer#3 almond antique chartreuse khaki white 17 4419.36 +Manufacturer#3 almond antique forest lavender goldenrod 14 5609.63 +Manufacturer#3 almond antique metallic orange dim 19 2747.68 +Manufacturer#3 almond antique misty red olive 1 7532.61 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 5492.7 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 27 2581.68 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7337.62 +Manufacturer#4 almond azure aquamarine papaya violet 12 3872.03 +Manufacturer#5 almond antique blue firebrick mint 31 2807.79 +Manufacturer#5 almond antique medium spring khaki 6 5883.93 +Manufacturer#5 almond antique sky peru orange 2 7672.66 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 23 4272.27 +PREHOOK: query: explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), VALUE._col7 (type: double) + outputColumnNames: _col1, _col7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col7 (type: double), round(avg_window_0, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: _col0:string, _col1:double, _col2:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_name p_retailprice _c2 +almond antique blue firebrick mint 1789.69 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique chartreuse khaki white 1671.68 1546.78 +almond antique chartreuse lavender yellow 1753.76 1546.78 +almond antique forest lavender goldenrod 1190.27 1546.78 +almond antique gainsboro frosted violet 1620.67 1546.78 +almond antique medium spring khaki 1611.66 1546.78 +almond antique metallic orange dim 1410.39 1546.78 +almond antique misty red olive 1922.98 1546.78 +almond antique olive coral navajo 1337.29 1546.78 +almond antique salmon chartreuse burlywood 1602.59 1546.78 +almond antique sky peru orange 1788.73 1546.78 +almond antique violet chocolate turquoise 1690.68 1546.78 +almond antique violet mint lemon 1375.42 1546.78 +almond antique violet turquoise frosted 1800.7 1546.78 +almond aquamarine burnished black steel 1414.42 1546.78 +almond aquamarine dodger light gainsboro 1018.1 1546.78 +almond aquamarine floral ivory bisque 1206.26 1546.78 +almond aquamarine midnight light salmon 2031.98 1546.78 +almond aquamarine pink moccasin thistle 1632.66 1546.78 +almond aquamarine rose maroon antique 1698.66 1546.78 +almond aquamarine sandy cyan gainsboro 1701.6 1546.78 +almond aquamarine yellow dodger mint 1844.92 1546.78 +almond azure aquamarine papaya violet 1290.35 1546.78 +almond azure blanched chiffon midnight 1464.48 1546.78 +PREHOOK: query: explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#6) -> boolean + predicate: (p_mfgr = 'Manufacturer#6') (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'Manufacturer#6' (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#6' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: 'Manufacturer#6' + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'Manufacturer#6' (type: string), sum_window_0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_mfgr sum_window_0 +PREHOOK: query: explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val Manufacturer#1) -> boolean + predicate: (p_mfgr = 'Manufacturer#1') (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'Manufacturer#1' (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: 'Manufacturer#1' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), VALUE._col6 (type: double) + outputColumnNames: _col1, _col7 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 'Manufacturer#1' + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col7 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS CURRENT~FOLLOWING(6) + window function definition + alias: sum_window_1 + arguments: _col7 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS CURRENT~FOLLOWING(6) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: double), round(avg_window_0, 2) (type: double), round(sum_window_1, 2) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +p_retailprice _c1 _c2 +1173.15 1458.29 8749.73 +1173.15 1515.32 7576.58 +1414.42 1523.54 3047.08 +1602.59 1549.89 4649.67 +1632.66 1632.66 1632.66 +1753.76 1600.86 6403.43 +PREHOOK: query: explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterStringGroupColEqualStringScalar(col 2, val m1) -> boolean + predicate: (p_mfgr = 'm1') (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 'm1' (type: string) + sort order: + + Map-reduce partition columns: 'm1' (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col5 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 'm1' ASC NULLS FIRST + partition by: 'm1' + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col5 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sum_window_0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +sum_window_0 diff --git ql/src/test/results/clientpositive/windowing_gby2.q.out ql/src/test/results/clientpositive/windowing_gby2.q.out index a17ad93..414d9a4 100644 --- ql/src/test/results/clientpositive/windowing_gby2.q.out +++ ql/src/test/results/clientpositive/windowing_gby2.q.out @@ -86,7 +86,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -211,7 +211,7 @@ STAGE PLANS: arguments: _col0 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: avg_window_0 (type: double) @@ -338,7 +338,7 @@ STAGE PLANS: arguments: _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -385,7 +385,7 @@ STAGE PLANS: arguments: _col4 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -432,7 +432,7 @@ STAGE PLANS: arguments: _col7 name: percent_rank window function: GenericUDAFPercentRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -611,7 +611,7 @@ STAGE PLANS: arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/windowing_streaming.q.out ql/src/test/results/clientpositive/windowing_streaming.q.out index a4bbef2..8d1071f 100644 --- ql/src/test/results/clientpositive/windowing_streaming.q.out +++ ql/src/test/results/clientpositive/windowing_streaming.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -162,7 +162,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -362,7 +362,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator