diff --git data/files/vector_ptf_part_simple.txt data/files/vector_ptf_part_simple.txt new file mode 100644 index 0000000..2bcc7a6 --- /dev/null +++ data/files/vector_ptf_part_simple.txt @@ -0,0 +1,40 @@ +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle \N +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod \N +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet \N +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque \N +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 8b04cd4..ac35f91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -632,6 +632,13 @@ public void endGroup() throws HiveException { defaultEndGroup(); } + // Tell the operator the status of the next key-grouped VectorizedRowBatch that will be delivered + // to the process method. E.g. by reduce-shuffle. These semantics are needed by PTF so it can + // efficiently add computed values to the last batch of a group key. + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + // Do nothing. + } + // an blocking operator (e.g. GroupByOperator and JoinOperator) can // override this method to forward its outputs public void flush() throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index afe1484..c5a4217 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc; import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator; @@ -139,6 +140,7 @@ vectorOpvec.put(FileSinkDesc.class, VectorFileSinkOperator.class); vectorOpvec.put(FilterDesc.class, VectorFilterOperator.class); vectorOpvec.put(LimitDesc.class, VectorLimitOperator.class); + vectorOpvec.put(PTFDesc.class, VectorPTFOperator.class); vectorOpvec.put(SparkHashTableSinkDesc.class, VectorSparkHashTableSinkOperator.class); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index d9caa47..db9bac6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -377,20 +377,6 @@ private boolean pushRecordVector() { BytesWritable keyWritable = (BytesWritable) reader.getCurrentKey(); valueWritables = reader.getCurrentValues(); - // Check if this is a new group or same group - if (handleGroupKey && !keyWritable.equals(this.groupKey)) { - // If a operator wants to do some work at the beginning of a group - if (groupKey == null) { // the first group - this.groupKey = new BytesWritable(); - } else { - // If a operator wants to do some work at the end of a group - reducer.endGroup(); - } - - groupKey.set(keyWritable.getBytes(), 0, keyWritable.getLength()); - reducer.startGroup(); - } - processVectorGroup(keyWritable, valueWritables, tag); return true; } catch (Throwable e) { @@ -406,15 +392,20 @@ private boolean pushRecordVector() { } /** + * + * @param keyWritable * @param values - * @return true if it is not done and can take more inputs + * @param tag + * @throws HiveException + * @throws IOException */ private void processVectorGroup(BytesWritable keyWritable, Iterable values, byte tag) throws HiveException, IOException { + Preconditions.checkState(batch.size == 0); + // Deserialize key into vector row columns. - // Since we referencing byte column vector byte arrays by reference, we don't need - // a data buffer. + // byte[] keyBytes = keyWritable.getBytes(); int keyLength = keyWritable.getLength(); @@ -440,6 +431,24 @@ private void processVectorGroup(BytesWritable keyWritable, int batchBytes = keyBytes.length; try { for (Object value : values) { + if (rowIdx >= maxSize || + (rowIdx > 0 && batchBytes >= BATCH_BYTES)) { + + // Batch is full AND we have at least 1 more row... + batch.size = rowIdx; + if (handleGroupKey) { + reducer.setNextVectorBatchGroupStatus(/* isLastGroupBatch */ false); + } + reducer.process(batch, tag); + + // Reset just the value columns and value buffer. + for (int i = firstValueColumnOffset; i < batch.numCols; i++) { + // Note that reset also resets the data buffer for bytes column vectors. + batch.cols[i].reset(); + } + rowIdx = 0; + batchBytes = keyBytes.length; + } if (valueLazyBinaryDeserializeToRow != null) { // Deserialize value into vector row columns. BytesWritable valueWritable = (BytesWritable) value; @@ -454,24 +463,13 @@ private void processVectorGroup(BytesWritable keyWritable, valueLazyBinaryDeserializeToRow.deserialize(batch, rowIdx); } rowIdx++; - if (rowIdx >= maxSize || batchBytes >= BATCH_BYTES) { - - // Batch is full. - batch.size = rowIdx; - reducer.process(batch, tag); - - // Reset just the value columns and value buffer. - for (int i = firstValueColumnOffset; i < batch.numCols; i++) { - // Note that reset also resets the data buffer for bytes column vectors. - batch.cols[i].reset(); - } - rowIdx = 0; - batchBytes = 0; - } } if (rowIdx > 0) { // Flush final partial batch. - VectorizedBatchUtil.setBatchSize(batch, rowIdx); + batch.size = rowIdx; + if (handleGroupKey) { + reducer.setNextVectorBatchGroupStatus(/* isLastGroupBatch */ true); + } reducer.process(batch, tag); } batch.reset(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 4b76d74..36d212d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -148,8 +148,7 @@ */ private static interface IProcessingMode { public void initialize(Configuration hconf) throws HiveException; - public void startGroup() throws HiveException; - public void endGroup() throws HiveException; + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException; public void processBatch(VectorizedRowBatch batch) throws HiveException; public void close(boolean aborted) throws HiveException; } @@ -159,14 +158,10 @@ */ private abstract class ProcessingModeBase implements IProcessingMode { - // Overridden and used in sorted reduce group batch processing mode. + // Overridden and used in ProcessingModeReduceMergePartial mode. @Override - public void startGroup() throws HiveException { - // Do nothing. - } - @Override - public void endGroup() throws HiveException { - // Do nothing. + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + throw new HiveException("Status call for next key-grouped VectorizedRowBatch not expected for ProcssingMode class " + this.getClass().getName()); } protected abstract void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, @@ -258,6 +253,11 @@ public void initialize(Configuration hconf) throws HiveException { } @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + // Do nothing. + } + + @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { @@ -762,8 +762,8 @@ public void close(boolean aborted) throws HiveException { */ private class ProcessingModeReduceMergePartial extends ProcessingModeBase { - private boolean inGroup; private boolean first; + private boolean isLastGroupBatch; /** * The group vector key helper. @@ -782,7 +782,7 @@ public void close(boolean aborted) throws HiveException { @Override public void initialize(Configuration hconf) throws HiveException { - inGroup = false; + isLastGroupBatch = true; // We do not include the dummy grouping set column in the output. So we pass outputKeyLength // instead of keyExpressions.length @@ -794,24 +794,16 @@ public void initialize(Configuration hconf) throws HiveException { } @Override - public void startGroup() throws HiveException { - inGroup = true; - first = true; - } - - @Override - public void endGroup() throws HiveException { - if (inGroup && !first) { - writeGroupRow(groupAggregators, buffer); - groupAggregators.reset(); + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + if (this.isLastGroupBatch) { + first = true; } - inGroup = false; + this.isLastGroupBatch = isLastGroupBatch; } @Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { - assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. first = false; @@ -828,11 +820,16 @@ public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(groupAggregators.getAggregationBuffer(i), batch); } + + if (isLastGroupBatch) { + writeGroupRow(groupAggregators, buffer); + groupAggregators.reset(); + } } @Override public void close(boolean aborted) throws HiveException { - if (!aborted && inGroup && !first) { + if (!aborted && !first && !isLastGroupBatch) { writeGroupRow(groupAggregators, buffer); } } @@ -1003,21 +1000,26 @@ private void changeToStreamingMode() throws HiveException { } @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + processingMode.setNextVectorBatchGroupStatus(isLastGroupBatch); + } + + @Override public void startGroup() throws HiveException { - processingMode.startGroup(); // We do not call startGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.startGroup(); + throw new HiveException("Unexpected startGroup"); } @Override public void endGroup() throws HiveException { - processingMode.endGroup(); // We do not call endGroup on operators below because we are batching rows in // an output batch and the semantics will not work. // super.endGroup(); + throw new HiveException("Unexpected startGroup"); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 5c490ef..992cbce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -110,6 +110,14 @@ protected void initializeOp(Configuration hconf) throws HiveException { outputFieldNames, objectInspectors); } + // Must send on to VectorPTFOperator... + @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + for (Operator op : childOperators) { + op.setNextVectorBatchGroupStatus(isLastGroupBatch); + } + } + @Override public void process(Object row, int tag) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c3940cb..a9aba56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -406,7 +406,7 @@ protected boolean needsImplicitCastForDecimal(GenericUDF udf) { return udfsNeedingImplicitDecimalCast.contains(udfClass); } - protected int getInputColumnIndex(String name) throws HiveException { + public int getInputColumnIndex(String name) throws HiveException { if (name == null) { throw new HiveException("Null column name"); } @@ -438,7 +438,7 @@ protected OutputColumnManager(int initialOutputCol) { private final Set usedOutputColumns = new HashSet(); - int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { + int allocateOutputColumn(TypeInfo typeInfo) { if (initialOutputCol < 0) { // This is a test calling. return 0; @@ -499,7 +499,7 @@ void freeOutputColumn(int index) { } } - public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { + public int allocateScratchColumn(TypeInfo typeInfo) { return ocm.allocateOutputColumn(typeInfo); } @@ -2635,7 +2635,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } } - static String getScratchName(TypeInfo typeInfo) throws HiveException { + static String getScratchName(TypeInfo typeInfo) { // For now, leave DECIMAL precision/scale in the name so DecimalColumnVector scratch columns // don't need their precision/scale adjusted... if (typeInfo.getCategory() == Category.PRIMITIVE && diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java new file mode 100644 index 0000000..a094296 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * This is the vector PTF evaluator base class. + */ +public abstract class VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorBase.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private final WindowFrameDef windowFrameDef; + private final VectorExpression inputVecExpr; + protected final int inputColumnNum; + protected final int outputColumnNum; + + public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + this.windowFrameDef = windowFrameDef; + if (inputVecExpr == null) { + inputColumnNum = -1; + this.inputVecExpr = null; + } else { + inputColumnNum = inputVecExpr.getOutputColumn(); + if (inputVecExpr instanceof IdentityExpression) { + this.inputVecExpr = null; + } else { + this.inputVecExpr = inputVecExpr; + } + } + this.outputColumnNum = outputColumnNum; + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + if (inputVecExpr != null) { + inputVecExpr.evaluate(batch); + } + } + + public abstract void evaluateEndOfGroup(int groupCount); + + public boolean streamsResult() { + // Assume by default no. + return false; + } + + public abstract boolean isGroupValueNull(); + + public abstract Type getColumnVectorType(); + + public long getLongGroupValue() { + throw new RuntimeException("No long group value evaluator implementation " + this.getClass().getName()); + } + + public double getDoubleGroupValue() { + throw new RuntimeException("No double group value evaluator implementation " + this.getClass().getName()); + } + + public HiveDecimalWritable getDecimalGroupValue() { + throw new RuntimeException("No decimal group value evaluator implementation " + this.getClass().getName()); + } + + public int getOutputColumnNum() { + return outputColumnNum; + } + + public abstract void resetEvaluator(); +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java new file mode 100644 index 0000000..61807cf --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates count() for a PTF group. + */ +public class VectorPTFEvaluatorCount extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorCount.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected long count; + + public VectorPTFEvaluatorCount(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Count non-null column rows; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + ColumnVector colVector = batch.cols[inputColumnNum]; + if (colVector.isRepeating) { + if (colVector.noNulls) { + count += size; + } + } else if (colVector.noNulls) { + count += size; + } else { + boolean[] batchIsNull = colVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long varCount = 1; + i++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varCount++; + } + } + count += varCount; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return false; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return count; + } + + @Override + public void resetEvaluator() { + count = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java new file mode 100644 index 0000000..b1ce512 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal avg() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected HiveDecimalWritable sum; + private int nonNullGroupCount; + private HiveDecimalWritable temp; + private HiveDecimalWritable avg; + + public VectorPTFEvaluatorDecimalAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + avg = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum double column for avg; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + temp.setFromLong(batch.size); + if (isNull) { + sum.set(decimalColVector.vector[0]); + sum.mutateMultiply(temp); + isNull = false; + } else { + temp.mutateMultiply(decimalColVector.vector[0]); + sum.mutateAdd(temp); + } + nonNullGroupCount += size; + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + sum.set(vector[0]); + isNull = false; + } else { + sum.mutateAdd(vector[0]); + } + for (int i = 1; i < size; i++) { + sum.mutateAdd(vector[i]); + } + nonNullGroupCount += size; + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + sum.set(vector[i++]); + isNull = false; + } else { + sum.mutateAdd(vector[i++]); + } + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + } + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + if (!isNull) { + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + } + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return avg; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO); + nonNullGroupCount = 0; + avg.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java new file mode 100644 index 0000000..fea4612 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal first_value() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isNull; + protected HiveDecimalWritable firstValue; + + public VectorPTFEvaluatorDecimalFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + firstValue = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Capture first value; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (haveFirstValue) { + // First value already captured. + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + firstValue.set(decimalColVector.vector[0]); + isNull = false; + } else { + isNull = true; + } + haveFirstValue = true; + } else if (decimalColVector.noNulls) { + firstValue.set(decimalColVector.vector[0]); + isNull = false; + haveFirstValue = true; + } else { + if (decimalColVector.isNull[0]) { + isNull = true; + } else { + firstValue.set(decimalColVector.vector[0]); + isNull = false; + } + haveFirstValue = true; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return firstValue; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isNull = true; + firstValue.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java new file mode 100644 index 0000000..2d46c83 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal last_value() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected HiveDecimalWritable lastValue; + + public VectorPTFEvaluatorDecimalLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + lastValue = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Capture last value; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (!isNull) { + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + // Remember last value of each batch. + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + lastValue.set(decimalColVector.vector[0]); + isNull = false; + } + } else if (decimalColVector.noNulls) { + lastValue.set(decimalColVector.vector[size - 1]); + isNull = false; + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = size - 1; + while (batchIsNull[i]) { + if (--i < 0) { + return; + } + } + lastValue.set(decimalColVector.vector[i]); + isNull = false; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isNull = true; + lastValue.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java new file mode 100644 index 0000000..a2906af --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal max() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected HiveDecimalWritable max; + + public VectorPTFEvaluatorDecimalMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + max = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Max double column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + if (isNull) { + max.set(decimalColVector.vector[0]); + isNull = false; + } else { + HiveDecimalWritable repeatedMax = decimalColVector.vector[0]; + if (repeatedMax.compareTo(max) == 1) { + max.set(repeatedMax); + } + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + max.set(vector[0]); + isNull = false; + } else { + final HiveDecimalWritable dec = vector[0]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + for (int i = 1; i < size; i++) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + max.set(vector[i++]); + isNull = false; + } else { + final HiveDecimalWritable dec = vector[i++]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(max) == 1) { + max.set(dec); + } + } + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return max; + } + + private static HiveDecimal MIN_VALUE = HiveDecimal.create("-99999999999999999999999999999999999999"); + + @Override + public void resetEvaluator() { + isNull = true; + max.set(MIN_VALUE); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java new file mode 100644 index 0000000..5197aae --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.FastHiveDecimal; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal min() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected HiveDecimalWritable min; + + public VectorPTFEvaluatorDecimalMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + min = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Min decimal column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + if (isNull) { + min.set(decimalColVector.vector[0]); + isNull = false; + } else { + HiveDecimalWritable repeatedMin = decimalColVector.vector[0]; + if (repeatedMin.compareTo(min) == -1) { + min.set(repeatedMin); + } + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + min.set(vector[0]); + isNull = false; + } else { + final HiveDecimalWritable dec = vector[0]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + for (int i = 1; i < size; i++) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + min.set(vector[i++]); + isNull = false; + } else { + final HiveDecimalWritable dec = vector[i++]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable dec = vector[i]; + if (dec.compareTo(min) == -1) { + min.set(dec); + } + } + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return min; + } + + private static HiveDecimal MAX_VALUE = HiveDecimal.create("99999999999999999999999999999999999999"); + + @Override + public void resetEvaluator() { + isNull = true; + min.set(MAX_VALUE); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java new file mode 100644 index 0000000..91f74ae --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal sum() for a PTF group. + */ +public class VectorPTFEvaluatorDecimalSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDecimalSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected HiveDecimalWritable sum; + protected HiveDecimalWritable temp; + + public VectorPTFEvaluatorDecimalSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum decimal column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + if (decimalColVector.isRepeating) { + if (decimalColVector.noNulls) { + temp.setFromLong(batch.size); + if (isNull) { + sum.set(decimalColVector.vector[0]); + sum.mutateMultiply(temp); + isNull = false; + } else { + temp.mutateMultiply(decimalColVector.vector[0]); + sum.mutateAdd(temp); + } + } + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + sum.set(vector[0]); + isNull = false; + } else { + sum.mutateAdd(vector[0]); + } + for (int i = 1; i < size; i++) { + sum.mutateAdd(vector[i]); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + HiveDecimalWritable[] vector = decimalColVector.vector; + if (isNull) { + sum.set(vector[i++]); + isNull = false; + } else { + sum.mutateAdd(vector[i++]); + } + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + } + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public HiveDecimalWritable getDecimalGroupValue() { + return sum; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO);; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java new file mode 100644 index 0000000..e417418 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates rank() for a PTF group. + */ +public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDenseRank.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int denseRank; + private int groupDenseRank; + + public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Nothing to do. + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + groupDenseRank = denseRank; + denseRank++; + } + + public boolean isGroupValueNull() { + return false; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return groupDenseRank; + } + + @Override + public void resetEvaluator() { + denseRank = 1; + groupDenseRank = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java new file mode 100644 index 0000000..d07c492 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double avg() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected double sum; + private int nonNullGroupCount; + private double avg; + + public VectorPTFEvaluatorDoubleAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum double column for avg; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isNull) { + sum = doubleColVector.vector[0] * batch.size; + isNull = false; + } else { + sum += doubleColVector.vector[0] * batch.size; + } + nonNullGroupCount += size; + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + nonNullGroupCount += size; + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varSum = vector[i++]; + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + nonNullGroupCount++; + } + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + if (!isNull) { + avg = sum / nonNullGroupCount; + } + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return avg; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java new file mode 100644 index 0000000..593bc87 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double first_value() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isNull; + protected double firstValue; + + public VectorPTFEvaluatorDoubleFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Max double column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (haveFirstValue) { + // First value already captured. + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + firstValue = doubleColVector.vector[0]; + isNull = false; + } else { + isNull = true; + } + haveFirstValue = true; + } else if (doubleColVector.noNulls) { + firstValue = doubleColVector.vector[0]; + isNull = false; + haveFirstValue = true; + } else { + if (doubleColVector.isNull[0]) { + isNull = true; + } else { + firstValue = doubleColVector.vector[0]; + isNull = false; + } + haveFirstValue = true; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return firstValue; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isNull = true; + firstValue = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java new file mode 100644 index 0000000..0daff19 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double first_value() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected double lastValue; + + public VectorPTFEvaluatorDoubleLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Capture last value; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + // Remember last value of each batch. + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + lastValue = doubleColVector.vector[0]; + isNull = false; + } + } else if (doubleColVector.noNulls) { + lastValue = doubleColVector.vector[size - 1]; + isNull = false; + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = size - 1; + while (batchIsNull[i]) { + if (--i < 0) { + return; + } + } + lastValue = doubleColVector.vector[i]; + isNull = false; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isNull = true; + lastValue = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java new file mode 100644 index 0000000..044b740 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double max() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected double max; + + public VectorPTFEvaluatorDoubleMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Max double column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isNull) { + max = doubleColVector.vector[0]; + isNull = false; + } else { + final double repeatedMax = doubleColVector.vector[0]; + if (repeatedMax < max) { + max = repeatedMax; + } + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varMax = vector[0]; + for (int i = 1; i < size; i++) { + final double d = vector[i]; + if (d > varMax) { + varMax = d; + } + } + if (isNull) { + max = varMax; + isNull = false; + } else if (varMax > max) { + max = varMax; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varMax = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double d = vector[i]; + if (d > varMax) { + varMax = d; + } + } + } + if (isNull) { + max = varMax; + isNull = false; + } else if (varMax > max) { + max = varMax; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return max; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = Double.MIN_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java new file mode 100644 index 0000000..03eec42 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double min() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected double min; + + public VectorPTFEvaluatorDoubleMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Min double column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isNull) { + min = doubleColVector.vector[0]; + isNull = false; + } else { + final double repeatedMin = doubleColVector.vector[0]; + if (repeatedMin < min) { + min = repeatedMin; + } + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varMin = vector[0]; + for (int i = 1; i < size; i++) { + final double d = vector[i]; + if (d < varMin) { + varMin = d; + } + } + if (isNull) { + min = varMin; + isNull = false; + } else if (varMin < min) { + min = varMin; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varMin = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double d = vector[i]; + if (d < varMin) { + varMin = d; + } + } + } + if (isNull) { + min = varMin; + isNull = false; + } else if (varMin < min) { + min = varMin; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return min; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = Double.MAX_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java new file mode 100644 index 0000000..7387f9e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double sum() for a PTF group. + */ +public class VectorPTFEvaluatorDoubleSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorDoubleSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected double sum; + + public VectorPTFEvaluatorDoubleSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum double column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + if (doubleColVector.isRepeating) { + if (doubleColVector.noNulls) { + if (isNull) { + sum = doubleColVector.vector[0] * batch.size; + isNull = false; + } else { + sum += doubleColVector.vector[0] * batch.size; + } + } + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + double varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + double[] vector = doubleColVector.vector; + double varSum = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + } + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return sum; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java new file mode 100644 index 0000000..860f434 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + */ +public class VectorPTFEvaluatorLongAvg extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongAvg.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected long sum; + private int nonNullGroupCount; + private double avg; + + public VectorPTFEvaluatorLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum long column for avg; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isNull) { + sum = longColVector.vector[0] * batch.size; + isNull = false; + } else { + sum += longColVector.vector[0] * batch.size; + } + nonNullGroupCount += size; + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + nonNullGroupCount += size; + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varSum = vector[i++]; + nonNullGroupCount++; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + nonNullGroupCount++; + } + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + if (!isNull) { + avg = ((double) sum) / nonNullGroupCount; + } + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public double getDoubleGroupValue() { + return avg; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java new file mode 100644 index 0000000..4ce0fdc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long first_value() for a PTF group. + */ +public class VectorPTFEvaluatorLongFirstValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongFirstValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean haveFirstValue; + protected boolean isNull; + protected long firstValue; + + public VectorPTFEvaluatorLongFirstValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Capture first long; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + if (haveFirstValue) { + // First value already captured. + return; + } + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + firstValue = longColVector.vector[0]; + isNull = false; + } else { + isNull = true; + } + haveFirstValue = true; + } else if (longColVector.noNulls) { + firstValue = longColVector.vector[0]; + isNull = false; + haveFirstValue = true; + } else { + if (longColVector.isNull[0]) { + isNull = true; + } else { + firstValue = longColVector.vector[0]; + isNull = false; + } + haveFirstValue = true; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return firstValue; + } + + @Override + public void resetEvaluator() { + haveFirstValue = false; + isNull = true; + firstValue = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java new file mode 100644 index 0000000..7bd4a9e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long first_value() for a PTF group. + */ +public class VectorPTFEvaluatorLongLastValue extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongLastValue.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected long lastValue; + + public VectorPTFEvaluatorLongLastValue(WindowFrameDef windowFrameDef, + VectorExpression inputVecExpr, int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Capture last long; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + // Remember last value of each batch. + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + lastValue = longColVector.vector[0]; + isNull = false; + } + } else if (longColVector.noNulls) { + lastValue = longColVector.vector[size - 1]; + isNull = false; + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = size - 1; + while (batchIsNull[i]) { + if (--i < 0) { + return; + } + } + lastValue = longColVector.vector[i]; + isNull = false; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return lastValue; + } + + @Override + public void resetEvaluator() { + isNull = true; + lastValue = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java new file mode 100644 index 0000000..bf90773 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long max() for a PTF group. + */ +public class VectorPTFEvaluatorLongMax extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongMax.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected long max; + + public VectorPTFEvaluatorLongMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Max long column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isNull) { + max = longColVector.vector[0]; + isNull = false; + } else { + final long repeatedMax = longColVector.vector[0]; + if (repeatedMax > max) { + max = repeatedMax; + } + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varMax = vector[0]; + for (int i = 1; i < size; i++) { + final long l = vector[i]; + if (l > varMax) { + varMax = l; + } + } + if (isNull) { + max = varMax; + isNull = false; + } else if (varMax > max) { + max = varMax; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varMax = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long l = vector[i]; + if (l > varMax) { + varMax = l; + } + } + } + if (isNull) { + max = varMax; + isNull = false; + } else if (varMax > max) { + max = varMax; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return max; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = Long.MIN_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java new file mode 100644 index 0000000..f2513fd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long min() for a PTF group. + */ +public class VectorPTFEvaluatorLongMin extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongMin.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected long min; + + public VectorPTFEvaluatorLongMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Min long column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isNull) { + min = longColVector.vector[0]; + isNull = false; + } else { + final long repeatedMin = longColVector.vector[0]; + if (repeatedMin < min) { + min = repeatedMin; + } + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varMin = vector[0]; + for (int i = 1; i < size; i++) { + final long l = vector[i]; + if (l < varMin) { + varMin = l; + } + } + if (isNull) { + min = varMin; + isNull = false; + } else if (varMin < min) { + min = varMin; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varMin = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long l = vector[i]; + if (l < varMin) { + varMin = l; + } + } + } + if (isNull) { + min = varMin; + isNull = false; + } else if (varMin < min) { + min = varMin; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return min; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = Long.MAX_VALUE; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java new file mode 100644 index 0000000..5bfb93f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long sum() for a PTF group. + */ +public class VectorPTFEvaluatorLongSum extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorLongSum.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + protected boolean isNull; + protected long sum; + + public VectorPTFEvaluatorLongSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Sum long column; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + if (longColVector.isRepeating) { + if (longColVector.noNulls) { + if (isNull) { + sum = longColVector.vector[0] * batch.size; + isNull = false; + } else { + sum += longColVector.vector[0] * batch.size; + } + } + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + long varSum = vector[0]; + for (int i = 1; i < size; i++) { + varSum += vector[i]; + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (++i >= size) { + return; + } + } + long[] vector = longColVector.vector; + long varSum = vector[i++]; + for (; i < size; i++) { + if (!batchIsNull[i]) { + varSum += vector[i]; + } + } + if (isNull) { + sum = varSum; + isNull = false; + } else { + sum += varSum; + } + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do. + } + + @Override + public boolean isGroupValueNull() { + return isNull; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return sum; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java new file mode 100644 index 0000000..f5727ec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates rank() for a PTF group. + */ +public class VectorPTFEvaluatorRank extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorRank.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int rank; + private int groupRank; + + public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + // Nothing to do. + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + groupRank = rank; + rank += groupCount; + } + + public boolean isGroupValueNull() { + return false; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public long getLongGroupValue() { + return groupRank; + } + + @Override + public void resetEvaluator() { + rank = 1; + groupRank = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java new file mode 100644 index 0000000..942f043 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +/** + * This class evaluates row_number() for a PTF group. + */ +public class VectorPTFEvaluatorRowNumber extends VectorPTFEvaluatorBase { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFEvaluatorRowNumber.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int rowNumber; + + public VectorPTFEvaluatorRowNumber(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + super.evaluateGroupBatch(batch); + + final int size = batch.size; + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + vector[i] = rowNumber++; + } + } + + @Override + public void evaluateEndOfGroup(int groupCount) { + // Nothing to do -- we stream the row number in with evaluateGroupBatch. + } + + public boolean streamsResult() { + // No group value. + return true; + } + + public boolean isGroupValueNull() { + return false; + } + + @Override + public Type getColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + rowNumber = 1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java new file mode 100644 index 0000000..2883c96 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +import com.google.common.base.Preconditions; + +/** + * This class is encapsulates one or more VectorizedRowBatch of a PTF group. + */ +public class VectorPTFGroupBatches { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFGroupBatches.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private int groupRowCount; + private VectorPTFEvaluatorBase[] evaluators; + private VectorizedRowBatch tempRefVrb; + + public VectorPTFGroupBatches() { + groupRowCount = 0; + } + + public void init(VectorPTFEvaluatorBase[] evaluators) { + this.evaluators = evaluators; + } + + public void evaluateGroupBatch(VectorizedRowBatch batch) { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.evaluateGroupBatch(batch); + } + groupRowCount += batch.size; + + // UNDONE: Until we handle multiple VRB. + tempRefVrb = batch; + } + + public void fillGroupResultsAndForward(VectorPTFOperator vecPTFOperator) throws HiveException { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.evaluateEndOfGroup(groupRowCount); + } + groupRowCount = 0; + + // UNDONE: For now, just one VRB. + VectorizedRowBatch batch = tempRefVrb; + for (VectorPTFEvaluatorBase evaluator : evaluators) { + final int outputColumnNum = evaluator.getOutputColumnNum(); + if (evaluator.streamsResult()) { + continue; + } + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + outputColVector.isRepeating = true; + final boolean isGroupValueNull = evaluator.isGroupValueNull(); + outputColVector.isNull[0] = isGroupValueNull; + if (isGroupValueNull) { + outputColVector.noNulls = false; + } else { + outputColVector.noNulls = true; + switch (evaluator.getColumnVectorType()) { + case LONG: + ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupValue(); + break; + case DOUBLE: + ((DoubleColumnVector) outputColVector).vector[0] = evaluator.getDoubleGroupValue(); + break; + case DECIMAL: + ((DecimalColumnVector) outputColVector).vector[0].set(evaluator.getDecimalGroupValue()); + break; + default: + throw new RuntimeException("Unexpected column vector type " + evaluator.getColumnVectorType()); + } + } + } + vecPTFOperator.forward(batch, null); + + // for each group batch (oldest first, may be from temporary storage): + // for each evaluator: + // groupValue = evaluator.getGroupValue(); + // repeated fill output column + // forward batch + // if batch from pool, reset and return to pool + } + + public void resetEvaluators() { + for (VectorPTFEvaluatorBase evaluator : evaluators) { + evaluator.resetEvaluator(); + } + Preconditions.checkState(groupRowCount == 0); + } + + public void bufferGroupBatch(VectorizedRowBatch batch) { + // UNDONE: Copy relevant batch columns to batch from pool. + // UNDONE: Or, if pool is used up, write batch to temporary storage. + throw new RuntimeException("bufferGroupBatch not implemented yet (size " + batch.size + ")"); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java new file mode 100644 index 0000000..2361db8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -0,0 +1,459 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * This class is native vectorized PTF operator class. + */ +public class VectorPTFOperator extends Operator + implements VectorizationContextRegion { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorPTFOperator.class.getName(); + private static final Log LOG = LogFactory.getLog(CLASS_NAME); + + private VectorPTFDesc vectorDesc; + + /** + * Information about our native vectorized PTF created by the Vectorizer class during + * it decision process and useful for execution. + */ + private VectorPTFInfo vectorPTFInfo; + + private VectorizationContext vContext; + + private boolean isPartitionOrderBy; + + /** + * PTF vector expressions. + */ + + // This is map of which vectorized row batch columns are the input columns and the group value + // (aggregation) output columns. + // And, their types. + private int[] outputColumnMap; + private TypeInfo[] outputTypeInfos; + + private String[] evaluatorFunctionNames; + private WindowFrameDef[] evaluatorWindowFrameDefs; + private ExprNodeDesc[] evaluatorInputExprNodeDescs; + + private ExprNodeDesc[] partitionExprNodeDescs; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient boolean isLastGroupBatch; + + private transient VectorPTFGroupBatches groupBatches; + + private transient int[] partitionColumnMap; + + // Optional vectorized key expressions that need to be run on each batch. + private transient Type[] partitionColumnVectorTypes; + private transient VectorExpression[] partitionExpressions; + + private transient boolean isFirstPartition; + + private transient boolean[] currentPartitionIsNull; + private transient long[] currentPartitionLongs; + private transient double[] currentPartitionDoubles; + private transient byte[][] currentPartitionByteArrays; + private transient int[] currentPartitionByteLengths; + private transient HiveDecimalWritable[] currentPartitionDecimals; + + // For debug tracing: the name of the map or reduce task. + private transient String taskName; + + // Debug display. + private transient long batchCounter; + + //--------------------------------------------------------------------------- + + /** Kryo ctor. */ + protected VectorPTFOperator() { + super(); + } + + public VectorPTFOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorPTFOperator(CompilationOpContext ctx, + VectorizationContext vContext, OperatorDesc conf) throws HiveException { + this(ctx); + + LOG.info("VectorPTF constructor"); + + PTFDesc desc = (PTFDesc) conf; + this.conf = desc; + vectorDesc = (VectorPTFDesc) desc.getVectorDesc(); + vectorPTFInfo = vectorDesc.getVectorPTFInfo(); + this.vContext = vContext; + + isPartitionOrderBy = vectorPTFInfo.getIsPartitionOrderBy(); + + outputColumnMap = vectorPTFInfo.getOutputColumnMap(); + outputTypeInfos = vectorPTFInfo.getOutputTypeInfos(); + + evaluatorFunctionNames = vectorPTFInfo.getEvaluatorFunctionNames(); + evaluatorWindowFrameDefs = vectorPTFInfo.getEvaluatorWindowFrameDefs(); + evaluatorInputExprNodeDescs = vectorPTFInfo.getEvaluatorInputExprNodeDescs(); + + if (isPartitionOrderBy) { + partitionExprNodeDescs = vectorPTFInfo.getPartitionExprNodeDescs(); + } + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + if (isLogDebugEnabled) { + LOG.debug("isPartitionOrderBy " + isPartitionOrderBy); + + LOG.debug("outputColumnMap " + + (outputColumnMap == null ? "NULL" : Arrays.toString(outputColumnMap))); + LOG.debug("outputTypeInfos " + + (outputTypeInfos == null ? "NULL" : Arrays.toString(outputTypeInfos))); + + LOG.debug("evaluatorFunctionNames " + + (evaluatorFunctionNames == null ? "NULL" : Arrays.toString(evaluatorFunctionNames))); + LOG.debug("evaluatorInputExprNodeDescs " + + (evaluatorInputExprNodeDescs == null ? "NULL" : Arrays.toString(evaluatorInputExprNodeDescs))); + + LOG.debug("partitionExprNodeDescs " + + (partitionExprNodeDescs == null ? "NULL" : Arrays.toString(partitionExprNodeDescs))); + } + + if (LOG.isDebugEnabled()) { + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + taskName = work.getName(); + } + + // Unpack partition information only when there is an ORDER BY clause... + + if (isPartitionOrderBy) { + final int count = partitionExprNodeDescs.length; + partitionColumnMap = new int[count]; + partitionColumnVectorTypes = new Type[count]; + partitionExpressions = new VectorExpression[count]; + for (int i = 0; i < count; i++) { + ExprNodeDesc exprNodeDesc = (ExprNodeDesc) partitionExprNodeDescs[i]; + + VectorExpression inputVectorExpression = vContext.getVectorExpression(exprNodeDesc); + partitionColumnMap[i] = inputVectorExpression.getOutputColumn(); + if (!(inputVectorExpression instanceof IdentityExpression)) { + partitionExpressions[i] = inputVectorExpression; + } + + TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + partitionColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + } + + currentPartitionIsNull = new boolean[count]; + currentPartitionLongs = new long[count]; + currentPartitionDoubles = new double[count]; + currentPartitionByteArrays = new byte[count][]; + currentPartitionByteLengths = new int[count]; + currentPartitionDecimals = new HiveDecimalWritable[count]; + + if (isLogDebugEnabled) { + LOG.debug("partitionColumnMap " + Arrays.toString(partitionColumnMap)); + LOG.debug("partitionColumnVectorTypes " + Arrays.toString(partitionColumnVectorTypes)); + LOG.debug("partitionExpressions " + Arrays.toString(partitionExpressions)); + } + } + + groupBatches = new VectorPTFGroupBatches(); + final int count = evaluatorFunctionNames.length; + VectorPTFEvaluatorBase[] evaluators = new VectorPTFEvaluatorBase[count]; + + for (int i = 0; i < count; i++) { + String functionName = evaluatorFunctionNames[i]; + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + SupportedFunctionType functionType = VectorPTFInfo.supportedFunctionsMap.get(functionName); + ExprNodeDesc exprNodeDesc = (ExprNodeDesc) evaluatorInputExprNodeDescs[i]; + + final VectorExpression inputVectorExpression; + final Type columnVectorType; + if (exprNodeDesc != null) { + inputVectorExpression = vContext.getVectorExpression(exprNodeDesc); + + TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + } else { + inputVectorExpression = null; + columnVectorType = ColumnVector.Type.NONE; + } + + // The output* arrays start at index 0 for output aggregations. + final int outputColumnNum = outputColumnMap[i]; + + VectorPTFEvaluatorBase evaluator = + VectorPTFInfo.getEvaluator(functionType, windowFrameDef, columnVectorType, inputVectorExpression, outputColumnNum); + + evaluators[i] = evaluator; + } + + groupBatches.init(evaluators); + + isFirstPartition = true; + + batchCounter = 0; + } + + @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + this.isLastGroupBatch = isLastGroupBatch; + } + + /** + * We are processing a batch from reduce processor that is only for one group (i.e. reducer) key. + * + * For a simple OVER (PARTITION BY column) or OVER (ORDER BY column), the reduce processor's + * group key is the partition or order by key. + * + * For an OVER (PARTITION BY column1, ORDER BY column2), the reduce-shuffle group key is + * the combination of the partition column1 and the order by column2. In this case, this method + * has to watch for changes in the partition and reset the group aggregations. + * + * The reduce processor calls setNextVectorBatchGroupStatus beforehand to tell us whether the + * batch supplied to our process method is the last batch for the group key, or not. + */ + @Override + public void process(Object row, int tag) throws HiveException { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + if (isPartitionOrderBy) { + if (isFirstPartition) { + isFirstPartition = false; + setCurrentPartition(batch); + } else if (isPartitionChanged(batch)) { + setCurrentPartition(batch); + groupBatches.resetEvaluators(); + } + } + + /* + * Evaluate the aggregation functions over the entire group batch. + */ + groupBatches.evaluateGroupBatch(batch); + + if (isLastGroupBatch) { + + /* + * Take the group aggregation values and write output columns for all rows of every batch of + * the group. As each group batch is finished being written, it is forwarded to the next + * operator. + * + * Note the last batch is always our current input batch. + */ + groupBatches.fillGroupResultsAndForward(this); + if (!isPartitionOrderBy) { + groupBatches.resetEvaluators(); + } + } else { + + // The group spans a VectorizedRowBatch. Copy the relevant columns into our batch buffers, or + // to temporary storage. + groupBatches.bufferGroupBatch(batch); + } + } + + private boolean isPartitionChanged(VectorizedRowBatch batch) { + final int count = partitionColumnMap.length; + for (int i = 0; i < count; i++) { + ColumnVector colVector = batch.cols[partitionColumnMap[i]]; + + // Partition columns are repeated -- so we test element 0. + + final boolean isNull = !colVector.noNulls && colVector.isNull[0]; + final boolean currentIsNull = currentPartitionIsNull[i]; + + if (isNull != currentIsNull) { + return true; + } + + VectorExpression inputVectorExpression = partitionExpressions[i]; + if (inputVectorExpression != null) { + inputVectorExpression.evaluate(batch); + } + switch (partitionColumnVectorTypes[i]) { + case LONG: + if (currentPartitionLongs[i] != ((LongColumnVector) colVector).vector[0]) { + return true; + } + break; + case DOUBLE: + if (currentPartitionDoubles[i] != ((DoubleColumnVector) colVector).vector[0]) { + return true; + } + break; + case BYTES: + { + BytesColumnVector byteColVector = (BytesColumnVector) colVector; + byte[] bytes = byteColVector.vector[0]; + final int start = byteColVector.start[0]; + final int length = byteColVector.length[0]; + if (!StringExpr.equal(bytes, start, length, currentPartitionByteArrays[i], 0, currentPartitionByteLengths[i])) { + return true; + } + } + break; + case DECIMAL: + if (!currentPartitionDecimals[i].equals(((DecimalColumnVector) colVector).vector[0])) { + return true; + } + break; + default: + throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]); + } + } + return false; + } + + private void setCurrentPartition(VectorizedRowBatch batch) { + final int count = partitionColumnMap.length; + for (int i = 0; i < count; i++) { + ColumnVector colVector = batch.cols[partitionColumnMap[i]]; + + // Partition columns are repeated -- so we test element 0. + + final boolean isNull = !colVector.noNulls && colVector.isNull[0]; + currentPartitionIsNull[i] = isNull; + + if (isNull) { + continue; + } + + switch (partitionColumnVectorTypes[i]) { + case LONG: + currentPartitionLongs[i] = ((LongColumnVector) colVector).vector[0]; + break; + case DOUBLE: + currentPartitionDoubles[i] = ((DoubleColumnVector) colVector).vector[0]; + break; + case BYTES: + { + BytesColumnVector byteColVector = (BytesColumnVector) colVector; + byte[] bytes = byteColVector.vector[0]; + final int start = byteColVector.start[0]; + final int length = byteColVector.length[0]; + if (currentPartitionByteArrays[i] == null || currentPartitionByteLengths[i] < length) { + currentPartitionByteArrays[i] = Arrays.copyOfRange(bytes, start, start + length); + } else { + System.arraycopy(bytes, start, currentPartitionByteArrays[i], 0, length); + } + currentPartitionByteLengths[i] = length; + } + break; + case DECIMAL: + if (currentPartitionDecimals[i] == null) { + currentPartitionDecimals[i] = new HiveDecimalWritable(); + } + currentPartitionDecimals[i].set(((DecimalColumnVector) colVector).vector[0]); + break; + default: + throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]); + } + } + } + + @Override + public void forward(Object row, ObjectInspector rowInspector) throws HiveException { + super.forward(row, rowInspector); + } + + @Override + protected void closeOp(boolean abort) throws HiveException { + super.closeOp(abort); + + // We do not try to finish and flush an in-progress group because correct values require the + // last group batch. + } + + /** + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "PTF"; + } + + @Override + public OperatorType getType() { + return OperatorType.PTF; + } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vContext; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 8e689fe..8970992 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -35,8 +35,8 @@ import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.ArrayUtils; import org.apache.calcite.util.Pair; import org.apache.commons.lang3.tuple.ImmutablePair; import org.slf4j.Logger; @@ -61,6 +61,14 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator; @@ -95,6 +103,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc; @@ -102,6 +111,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; @@ -112,10 +122,14 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo.SupportedFunctionType; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; @@ -149,6 +163,13 @@ import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo; import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PartitionDef; +import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; import org.apache.hadoop.hive.ql.udf.UDFAtan; @@ -190,6 +211,8 @@ import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator; +import org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.NullStructSerDe; @@ -1633,7 +1656,7 @@ private ValidatorVectorizationContext(HiveConf hiveConf) { } @Override - protected int getInputColumnIndex(String name) { + public int getInputColumnIndex(String name) { return 0; } @@ -1792,6 +1815,9 @@ boolean validateReduceWorkOperator(Operator op) { ret = op instanceof SparkHashTableSinkOperator && validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); break; + case PTF: + ret = validatePTFOperator((PTFOperator) op); + break; default: setOperatorNotSupported(op); ret = false; @@ -2047,6 +2073,78 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { return true; } + private boolean validatePTFOperator(PTFOperator op) { + PTFDesc ptfDesc = (PTFDesc) op.getConf(); + boolean isMapSide = ptfDesc.isMapSide(); + if (isMapSide) { + setOperatorIssue("PTF Mapper not supported"); + return false; + } + boolean forNoop = ptfDesc.forNoop(); + if (forNoop) { + setOperatorIssue("NOOP not supported"); + return false; + } + boolean forWindowing = ptfDesc.forWindowing(); + if (!forWindowing) { + setOperatorIssue("Windowing required"); + return false; + } + PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); + boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef); + if (isWindowTableFunctionDef) { + + // As a validator, when we pass null for vContext, the output column info will return null. + VectorPTFInfo vectorPTFInfo = null; + try { + vectorPTFInfo = createVectorPTFInfo(op, ptfDesc, /* vContext */ null); + } catch (HiveException e) { + setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e)); + return false; + } + + // UNDONE: Validate outputExprNodeColumns + + boolean isPartitionOrderBy = vectorPTFInfo.getIsPartitionOrderBy(); + String[] evaluatorFunctionNames = vectorPTFInfo.getEvaluatorFunctionNames(); + final int count = evaluatorFunctionNames.length; + WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFInfo.getEvaluatorWindowFrameDefs(); + + for (int i = 0; i < count; i++) { + String functionName = evaluatorFunctionNames[i]; + SupportedFunctionType supportedFunctionType = VectorPTFInfo.supportedFunctionsMap.get(functionName); + if (supportedFunctionType == null) { + setOperatorIssue(functionName + " not in supported functions " + VectorPTFInfo.supportedFunctionNames); + return false; + } + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + if (!windowFrameDef.isStartUnbounded()) { + setOperatorIssue(functionName + " only UNBOUNDED start frame is supported"); + return false; + } + switch (windowFrameDef.getWindowType()) { + case RANGE: + if (!windowFrameDef.getEnd().isCurrentRow()) { + setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE"); + return false; + } + break; + case ROWS: + if (!windowFrameDef.isEndUnbounded()) { + setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type"); + return false; + } + break; + default: + throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); + } + } + return true; + } + setOperatorIssue("Not quite ready yet validatePTFOperator"); + return false; + } + private boolean validateExprNodeDesc(List descs, String expressionTitle) { return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); } @@ -3308,6 +3406,141 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { selectOp.getCompilationOpContext(), selectDesc, vContext, selectOp); } + private static void fillInPTFEvaluators( + List windowsFunctions, + String[] evaluatorFunctionNames, + WindowFrameDef[] evaluatorWindowFrameDefs, + ExprNodeDesc[] evaluatorInputExprNodeDescs) throws HiveException { + final int functionCount = windowsFunctions.size(); + for (int i = 0; i < functionCount; i++) { + WindowFunctionDef winFunc = windowsFunctions.get(i); + evaluatorFunctionNames[i] = winFunc.getName(); + evaluatorWindowFrameDefs[i] = winFunc.getWindowFrame(); + + List args = winFunc.getArgs(); + if (args != null) { + + // UNDONE: Just one argument? + PTFExpressionDef arg = args.get(0); + + evaluatorInputExprNodeDescs[i] = arg.getExprNode(); + } + } + } + + private static VectorPTFInfo createVectorPTFInfo(Operator ptfOp, + PTFDesc ptfDesc, VectorizationContext vContext) throws HiveException { + PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef(); + + ArrayList outputSignature = ptfOp.getSchema().getSignature(); + final int outputSize = outputSignature.size(); + + List partitionExpressions = funcDef.getPartition().getExpressions(); + final int partitionExprSize = partitionExpressions.size(); + ExprNodeDesc[] partitionExprNodeDescs = null; + + // How do we determine there is more ordering than partition? See if order expression is longer? + boolean isPartitionOrderBy = false; + List orderExpressions = funcDef.getOrder().getExpressions(); + final int orderExprSize = orderExpressions.size(); + if (partitionExprSize != orderExprSize) { + // Obviously different expressions. + isPartitionOrderBy = true; + } else { + // Check each ExprNodeDesc. + for (int i = 0; i < partitionExprSize; i++) { + final ExprNodeDescEqualityWrapper partitionExprEqualityWrapper = + new ExprNodeDesc.ExprNodeDescEqualityWrapper(partitionExpressions.get(i).getExprNode()); + final ExprNodeDescEqualityWrapper orderExprEqualityWrapper = + new ExprNodeDesc.ExprNodeDescEqualityWrapper(orderExpressions.get(i).getExprNode()); + if (!partitionExprEqualityWrapper.equals(orderExprEqualityWrapper)) { + isPartitionOrderBy = true; + break; + } + } + } + + if (isPartitionOrderBy && partitionExprSize > 0) { + partitionExprNodeDescs = new ExprNodeDesc[partitionExprSize]; + for (int i = 0; i < partitionExprSize; i++) { + partitionExprNodeDescs[i] = partitionExpressions.get(i).getExprNode(); + } + } + + WindowTableFunctionDef windowTableFunctionDef = (WindowTableFunctionDef) funcDef; + List windowsFunctions = windowTableFunctionDef.getWindowFunctions(); + final int functionCount = windowsFunctions.size(); + + int[] outputColumnMap = null; + TypeInfo[] outputTypeInfos = null; + + // vContext will be null during validation... + if (vContext != null) { + outputColumnMap = new int[outputSize]; + outputTypeInfos = new TypeInfo[outputSize]; + for (int i = 0; i < functionCount; i++) { + ColumnInfo colInfo = outputSignature.get(i); + TypeInfo typeInfo = colInfo.getType(); + final int outputColumnNum; + outputColumnNum = vContext.allocateScratchColumn(typeInfo); + vContext.addProjectionColumn(colInfo.getInternalName(), outputColumnNum); + outputColumnMap[i] = outputColumnNum; + outputTypeInfos[i] = typeInfo; + } + for (int i = functionCount; i < outputSize; i++) { + ColumnInfo colInfo = outputSignature.get(i); + outputColumnMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName()); + outputTypeInfos[i] = colInfo.getType(); + } + } + + String[] evaluatorFunctionNames = new String[functionCount]; + WindowFrameDef[] evaluatorWindowFrameDefs = new WindowFrameDef[functionCount]; + ExprNodeDesc[] evaluatorInputExprNodeDescs = new ExprNodeDesc[functionCount]; + + fillInPTFEvaluators( + windowsFunctions, + evaluatorFunctionNames, + evaluatorWindowFrameDefs, + evaluatorInputExprNodeDescs); + + VectorPTFInfo vectorPTFInfo = new VectorPTFInfo(); + + vectorPTFInfo.setOutputColumnMap(outputColumnMap); + vectorPTFInfo.setOutputTypeInfos(outputTypeInfos); + + vectorPTFInfo.setIsPartitionOrderBy(isPartitionOrderBy); + if (isPartitionOrderBy && partitionExprSize > 0) { + vectorPTFInfo.setPartitionExprNodeDescs(partitionExprNodeDescs); + } + + vectorPTFInfo.setEvaluatorFunctionNames(evaluatorFunctionNames); + vectorPTFInfo.setEvaluatorWindowFrameDefs(evaluatorWindowFrameDefs); + vectorPTFInfo.setEvaluatorInputExprNodeDescs(evaluatorInputExprNodeDescs); + + return vectorPTFInfo; + } + + /* + * NOTE: The VectorPTFDesc has already been allocated and partially populated. + */ + public static Operator vectorizePTFOperator( + Operator ptfOp, VectorizationContext vContext) + throws HiveException { + PTFDesc ptfDesc = (PTFDesc) ptfOp.getConf(); + + VectorPTFDesc vectorPTFDesc = new VectorPTFDesc(); + ptfDesc.setVectorDesc(vectorPTFDesc); + + VectorPTFInfo vectorPTFInfo = createVectorPTFInfo(ptfOp, ptfDesc, vContext); + + vectorPTFDesc.setVectorPTFInfo(vectorPTFInfo); + + Class> opClass = VectorPTFOperator.class; + return OperatorFactory.getVectorOperator( + ptfOp.getCompilationOpContext(), ptfDesc, vContext, ptfOp); + } + public Operator vectorizeOperator(Operator op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException { @@ -3482,6 +3715,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { isNative = true; } break; + case PTF: + vectorOp = vectorizePTFOperator(op, vContext); + isNative = true; + break; case HASHTABLESINK: { SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java index c4b49b6..75f19f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java @@ -20,16 +20,28 @@ import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; import org.apache.hadoop.hive.ql.parse.LeadLagInfo; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.VectorPTFInfo.SupportedFunctionType; import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; import org.apache.hadoop.hive.ql.udf.ptf.Noop; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -114,4 +126,66 @@ public Configuration getCfg() { public void setCfg(Configuration cfg) { this.cfg = cfg; } + + public class PTFOperatorExplainVectorization extends OperatorExplainVectorization { + + private final PTFDesc PTFDesc; + private final VectorPTFDesc vectorPTFDesc; + private final VectorPTFInfo vectorPTFInfo; + + private VectorizationCondition[] nativeConditions; + + public PTFOperatorExplainVectorization(PTFDesc PTFDesc, VectorDesc vectorDesc) { + // VectorPTFOperator is native vectorized. + super(vectorDesc, true); + this.PTFDesc = PTFDesc; + vectorPTFDesc = (VectorPTFDesc) vectorDesc; + vectorPTFInfo = vectorPTFDesc.getVectorPTFInfo(); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "functionNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getFunctionNames() { + return Arrays.toString(vectorPTFInfo.getEvaluatorFunctionNames()); + } + + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "evaluatorClasses", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getEvaluatorClasses() { + String[] evaluatorFunctionNames = vectorPTFInfo.getEvaluatorFunctionNames(); + final int count = evaluatorFunctionNames.length; + WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFInfo.getEvaluatorWindowFrameDefs(); + ExprNodeDesc[] evaluatorInputExprNodeDescs = vectorPTFInfo.getEvaluatorInputExprNodeDescs(); + + ArrayList result = new ArrayList(count); + for (int i = 0; i < count; i++) { + String functionName = evaluatorFunctionNames[i]; + SupportedFunctionType functionType = VectorPTFInfo.supportedFunctionsMap.get(functionName); + WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i]; + ExprNodeDesc exprNodeDesc = (ExprNodeDesc) evaluatorInputExprNodeDescs[i]; + + // Fake. + VectorExpression inputVectorExpression = new IdentityExpression(0, "bigint"); + + TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); + PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + + // Fake. + final int outputColumnNum = 0; + + VectorPTFEvaluatorBase evaluator = + VectorPTFInfo.getEvaluator(functionType, windowFrameDef, columnVectorType, inputVectorExpression, outputColumnNum); + + result.add(evaluator.getClass().getSimpleName()); + } + return result.toString(); + } + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "PTF Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public PTFOperatorExplainVectorization getPTFVectorization() { + if (vectorDesc == null) { + return null; + } + return new PTFOperatorExplainVectorization(this, vectorDesc); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java new file mode 100644 index 0000000..d19bc45 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +/** + * VectorPTFDesc. + * + * Extra parameters beyond PTFDesc just for the VectorPTFOperator. + * + * We don't extend PTFDesc because the base OperatorDesc doesn't support + * clone and adding it is a lot work for little gain. + */ +public class VectorPTFDesc extends AbstractVectorDesc { + + private static final long serialVersionUID = 1L; + + private VectorPTFInfo vectorPTFInfo; + + public VectorPTFDesc() { + } + + public void setVectorPTFInfo(VectorPTFInfo vectorPTFInfo) { + this.vectorPTFInfo = vectorPTFInfo; + } + + public VectorPTFInfo getVectorPTFInfo() { + return vectorPTFInfo; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java new file mode 100644 index 0000000..7da3f43 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFInfo.java @@ -0,0 +1,294 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.TreeSet; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorBase; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorCount; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDecimalSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDenseRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongFirstValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongLastValue; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorGroupByAggregrationInfo. + * + * A convenience data structure that has information needed to vectorize reduce sink. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and against by the VectorPTFOperator's + * constructors and later during execution. + */ +public class VectorPTFInfo { + + private static final long serialVersionUID = 1L; + + public static enum SupportedFunctionType { + ROW_NUMBER, + RANK, + DENSE_RANK, + MIN, + MAX, + SUM, + AVG, + FIRST_VALUE, + LAST_VALUE, + COUNT + } + + public static HashMap supportedFunctionsMap = + new HashMap(); + static { + supportedFunctionsMap.put("row_number", SupportedFunctionType.ROW_NUMBER); + supportedFunctionsMap.put("rank", SupportedFunctionType.RANK); + supportedFunctionsMap.put("dense_rank", SupportedFunctionType.DENSE_RANK); + supportedFunctionsMap.put("min", SupportedFunctionType.MIN); + supportedFunctionsMap.put("max", SupportedFunctionType.MAX); + supportedFunctionsMap.put("sum", SupportedFunctionType.SUM); + supportedFunctionsMap.put("avg", SupportedFunctionType.AVG); + supportedFunctionsMap.put("first_value", SupportedFunctionType.FIRST_VALUE); + supportedFunctionsMap.put("last_value", SupportedFunctionType.LAST_VALUE); + supportedFunctionsMap.put("count", SupportedFunctionType.COUNT); + } + public static List supportedFunctionNames = new ArrayList(); + static { + TreeSet treeSet = new TreeSet(); + treeSet.addAll(supportedFunctionsMap.keySet()); + supportedFunctionNames.addAll(treeSet); + } + + private boolean isPartitionOrderBy; + + private int[] outputColumnMap; + private TypeInfo[] outputTypeInfos; + + private String[] evaluatorFunctionNames; + private WindowFrameDef[] evaluatorWindowFrameDefs; + private ExprNodeDesc[] evaluatorInputExprNodeDescs; + + private ExprNodeDesc[] partitionExprNodeDescs; + + public VectorPTFInfo() { + isPartitionOrderBy = false; + + outputColumnMap = null; + outputTypeInfos = null; + + evaluatorFunctionNames = null; + evaluatorInputExprNodeDescs = null; + + partitionExprNodeDescs = null; + } + + // We provide this public method to help EXPLAIN VECTORIZATION show the evaluator classes. + public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType functionType, + WindowFrameDef windowFrameDef, Type columnVectorType, VectorExpression inputVectorExpression, + int outputColumnNum) { + + VectorPTFEvaluatorBase evaluator; + switch (functionType) { + case ROW_NUMBER: + evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case RANK: + evaluator = new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DENSE_RANK: + evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case MIN: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case MAX: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case SUM: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case AVG: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case FIRST_VALUE: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case LAST_VALUE: + switch (columnVectorType) { + case LONG: + evaluator = new VectorPTFEvaluatorLongLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DOUBLE: + evaluator = new VectorPTFEvaluatorDoubleLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + case DECIMAL: + evaluator = new VectorPTFEvaluatorDecimalLastValue(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); + } + break; + case COUNT: + evaluator = new VectorPTFEvaluatorCount(windowFrameDef, inputVectorExpression, outputColumnNum); + break; + default: + throw new RuntimeException("Unexpected function type " + functionType); + } + return evaluator; + } + + public boolean getIsPartitionOrderBy() { + return isPartitionOrderBy; + } + + public void setIsPartitionOrderBy(boolean isPartitionOrderBy) { + this.isPartitionOrderBy = isPartitionOrderBy; + } + + public int[] getOutputColumnMap() { + return outputColumnMap; + } + + public void setOutputColumnMap(int[] outputColumnMap) { + this.outputColumnMap = outputColumnMap; + } + + public TypeInfo[] getOutputTypeInfos() { + return outputTypeInfos; + } + + public void setOutputTypeInfos(TypeInfo[] outputTypeInfos) { + this.outputTypeInfos = outputTypeInfos; + } + + public String[] getEvaluatorFunctionNames() { + return evaluatorFunctionNames; + } + + public void setEvaluatorFunctionNames(String[] evaluatorFunctionNames) { + this.evaluatorFunctionNames = evaluatorFunctionNames; + } + + public WindowFrameDef[] getEvaluatorWindowFrameDefs() { + return evaluatorWindowFrameDefs; + } + + public void setEvaluatorWindowFrameDefs(WindowFrameDef[] evaluatorWindowFrameDefs) { + this.evaluatorWindowFrameDefs = evaluatorWindowFrameDefs; + } + + public ExprNodeDesc[] getEvaluatorInputExprNodeDescs() { + return evaluatorInputExprNodeDescs; + } + + public void setEvaluatorInputExprNodeDescs(ExprNodeDesc[] evaluatorInputExprNodeDescs) { + this.evaluatorInputExprNodeDescs = evaluatorInputExprNodeDescs; + } + + public ExprNodeDesc[] getPartitionExprNodeDescs() { + return partitionExprNodeDescs; + } + + public void setPartitionExprNodeDescs(ExprNodeDesc[] partitionExprNodeDescs) { + this.partitionExprNodeDescs = partitionExprNodeDescs; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java index 0af878b..346abe3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/WindowFrameDef.java @@ -78,6 +78,6 @@ public int getWindowSize() { @Override public String toString() { - return start + "~" + end; + return windowType + " " + start + "~" + end; } } diff --git ql/src/test/queries/clientpositive/vector_ptf_part_simple.q ql/src/test/queries/clientpositive/vector_ptf_part_simple.q new file mode 100644 index 0000000..0d1e7b7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_ptf_part_simple.q @@ -0,0 +1,237 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text; + +create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text; + +select * from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc; + +explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc; + +explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc; + + + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc; + +explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc; + +explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc; + + + + +create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal; + +create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal; + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal; + +explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal; + + +explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal; + +explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal; + +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal; + + + + +create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc; +INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal; + +explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long; + +explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long; + +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long; + + +explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long; + +explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long; + +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long; + diff --git ql/src/test/queries/clientpositive/vectorized_ptf.q ql/src/test/queries/clientpositive/vectorized_ptf.q index 232aa11..dbc7ca6 100644 --- ql/src/test/queries/clientpositive/vectorized_ptf.q +++ ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -43,7 +43,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -64,7 +64,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -81,7 +81,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -94,7 +94,7 @@ order by p_name); -- 4. testPTFAlias -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -115,7 +115,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -138,7 +138,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -163,7 +163,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain vectorization extended +explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr @@ -178,7 +178,7 @@ order by p_name -- 8. testJoinRight -explain vectorization extended +explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -193,7 +193,7 @@ order by p_name -- 9. testNoopWithMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -208,7 +208,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -227,7 +227,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -248,7 +248,7 @@ order by p_name) -- 12. testFunctionChain -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -269,7 +269,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -296,7 +296,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain vectorization extended +explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -323,7 +323,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain vectorization extended +explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -342,7 +342,7 @@ round(sum(p_retailprice),2) as s from part_orc group by p_mfgr, p_brand; -explain vectorization extended +explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -376,7 +376,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain vectorization extended +explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -413,7 +413,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -448,7 +448,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -483,7 +483,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -515,7 +515,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -550,7 +550,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -583,7 +583,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain vectorization extended +explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, diff --git ql/src/test/queries/clientpositive/windowing_windowspec.q ql/src/test/queries/clientpositive/windowing_windowspec.q index 08b7d5c..c37aed3 100644 --- ql/src/test/queries/clientpositive/windowing_windowspec.q +++ ql/src/test/queries/clientpositive/windowing_windowspec.q @@ -31,6 +31,8 @@ select s, sum(i) over(partition by ts order by s) from over10k limit 100; select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; + select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; diff --git ql/src/test/results/clientpositive/correlationoptimizer12.q.out ql/src/test/results/clientpositive/correlationoptimizer12.q.out index 23443ee..ee9a6e7 100644 --- ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -57,7 +57,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) @@ -142,7 +142,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) diff --git ql/src/test/results/clientpositive/ctas_colname.q.out ql/src/test/results/clientpositive/ctas_colname.q.out index b0cab7e..8d61c9d 100644 --- ql/src/test/results/clientpositive/ctas_colname.q.out +++ ql/src/test/results/clientpositive/ctas_colname.q.out @@ -190,7 +190,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -354,7 +354,7 @@ STAGE PLANS: arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/distinct_windowing.q.out ql/src/test/results/clientpositive/distinct_windowing.q.out index 1605a62..197687a 100644 --- ql/src/test/results/clientpositive/distinct_windowing.q.out +++ ql/src/test/results/clientpositive/distinct_windowing.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) diff --git ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out index 6be1467..2f3a8b3 100644 --- ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out +++ ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) @@ -540,7 +540,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -680,7 +680,7 @@ STAGE PLANS: arguments: _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 4fc36ed..32135e4 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -110,7 +110,7 @@ STAGE PLANS: arguments: _col3 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/llap/groupby_resolution.q.out ql/src/test/results/clientpositive/llap/groupby_resolution.q.out index f2a6ab0..bf3d8e9 100644 --- ql/src/test/results/clientpositive/llap/groupby_resolution.q.out +++ ql/src/test/results/clientpositive/llap/groupby_resolution.q.out @@ -720,7 +720,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE Select Operator diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index df55238..ff262b2f 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -96,21 +96,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -306,7 +306,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -585,21 +585,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -765,21 +765,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -957,21 +957,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1460,7 +1460,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1636,21 +1636,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1812,21 +1812,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2049,21 +2049,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2235,13 +2235,13 @@ STAGE PLANS: arguments: _col5 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) @@ -2453,33 +2453,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2819,7 +2819,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~CURRENT + window frame: ROWS PRECEDING(2)~CURRENT Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) @@ -3041,21 +3041,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -3094,7 +3094,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(5)~CURRENT + window frame: RANGE PRECEDING(5)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) @@ -3131,28 +3131,28 @@ STAGE PLANS: arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) @@ -3492,21 +3492,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -3763,21 +3763,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4009,21 +4009,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4293,21 +4293,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -4558,21 +4558,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) @@ -4810,21 +4810,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/ptf_streaming.q.out ql/src/test/results/clientpositive/llap/ptf_streaming.q.out index 6013c11..18074eb 100644 --- ql/src/test/results/clientpositive/llap/ptf_streaming.q.out +++ ql/src/test/results/clientpositive/llap/ptf_streaming.q.out @@ -96,21 +96,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -306,7 +306,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -632,7 +632,7 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -808,21 +808,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1045,21 +1045,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1284,21 +1284,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1523,21 +1523,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), sum_window_2 (type: double) @@ -1743,33 +1743,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2022,21 +2022,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2293,21 +2293,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -2547,21 +2547,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/subquery_in.q.out ql/src/test/results/clientpositive/llap/subquery_in.q.out index 1cbbe8f..23f521a 100644 --- ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -346,7 +346,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -526,7 +526,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/llap/subquery_notin.q.out ql/src/test/results/clientpositive/llap/subquery_notin.q.out index f1efb64..c0d11b1 100644 --- ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -450,7 +450,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -512,7 +512,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -761,7 +761,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -822,7 +822,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1061,7 +1061,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1218,7 +1218,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1283,7 +1283,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index f6dc397..959b854 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1349,7 +1349,7 @@ STAGE PLANS: arguments: _col5 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: first_value_window_0 (type: int) @@ -1417,7 +1417,7 @@ STAGE PLANS: arguments: _col5 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: first_value_window_0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 678db83..5d0b23c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) @@ -113,7 +113,7 @@ STAGE PLANS: arguments: _col3 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out new file mode 100644 index 0000000..2349027 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -0,0 +1,3367 @@ +PREHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text +POSTHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_mfgr SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_name SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_retailprice SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_retailprice, type:double, comment:null), ] +vector_ptf_part_simple_text.p_mfgr vector_ptf_part_simple_text.p_name vector_ptf_part_simple_text.p_retailprice +PREHOOK: query: select * from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +vector_ptf_part_simple_orc.p_mfgr vector_ptf_part_simple_orc.p_name vector_ptf_part_simple_orc.p_retailprice +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod NULL +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque NULL +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double, double, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + functionNames: [row_number, rank, dense_rank, first_value, last_value, count] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: double) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string:string:double:int:int:int:double:double:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv, +count(p_retailprice) over(partition by p_mfgr) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c +Manufacturer#1 almond aquamarine burnished black steel 1414.42 1 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 2 1 1 1414.42 1753.76 11 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 3 1 1 1414.42 1753.76 11 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 4 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique burnished rose metallic 1173.15 8 1 1 1414.42 1753.76 11 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1414.42 1753.76 11 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 1 1 1414.42 1753.76 11 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 12 1 1 1414.42 1753.76 11 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 2031.98 8 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 2 1 1 900.66 2031.98 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 1 1 900.66 2031.98 8 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 4 1 1 900.66 2031.98 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 2031.98 8 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 6 1 1 900.66 2031.98 8 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 7 1 1 900.66 2031.98 8 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 8 1 1 900.66 2031.98 8 +Manufacturer#3 almond antique forest lavender goldenrod NULL 1 1 1 NULL 590.27 7 +Manufacturer#3 almond antique chartreuse khaki white 99.68 2 1 1 NULL 590.27 7 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3 1 1 NULL 590.27 7 +Manufacturer#3 almond antique metallic orange dim 55.39 4 1 1 NULL 590.27 7 +Manufacturer#3 almond antique olive coral navajo 1337.29 5 1 1 NULL 590.27 7 +Manufacturer#3 almond antique misty red olive 1922.98 6 1 1 NULL 590.27 7 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 7 1 1 NULL 590.27 7 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 8 1 1 NULL 590.27 7 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1375.42 4 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 2 1 1 1290.35 1375.42 4 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 3 1 1 1290.35 1375.42 4 +Manufacturer#4 almond antique gainsboro frosted violet NULL 4 1 1 1290.35 1375.42 4 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5 1 1 1290.35 1375.42 4 +Manufacturer#4 almond antique violet mint lemon 1375.42 6 1 1 1290.35 1375.42 4 +Manufacturer#5 almond antique medium spring khaki 1611.66 1 1 1 1611.66 1018.1 6 +Manufacturer#5 almond antique blue firebrick mint 1789.69 2 1 1 1611.66 1018.1 6 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 3 1 1 1611.66 1018.1 6 +Manufacturer#5 almond antique medium spring khaki 1611.66 4 1 1 1611.66 1018.1 6 +Manufacturer#5 almond antique sky peru orange 1788.73 5 1 1 1611.66 1018.1 6 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 6 1 1 1611.66 1018.1 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double, double, bigint + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + functionNames: [row_number, rank, dense_rank, first_value, last_value, count] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: double) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: count_window_5 + arguments: _col2 + name: count + window function: GenericUDAFCountEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double), count_window_5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types string:string:double:int:int:int:double:double:bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv, +count(p_retailprice) over(partition by p_mfgr order by p_name) as c +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv c +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 2 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 2 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 6 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 7 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 8 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 11 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 11 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 11 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 11 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 1 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 4 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 4 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 5 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 7 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 7 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 8 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 1 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 2 2 99.68 1190.27 4 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 2 2 99.68 1190.27 4 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 1190.27 4 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 5 2 2 99.68 1190.27 4 +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 5 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 6 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 7 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL 0 +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 1 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 2 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 2 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 3 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 4 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 1 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 3 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 3 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 4 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 5 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 6 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: double) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:double:double:double:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond aquamarine burnished black steel 1414.42 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.329999999998 1173.15 1753.76 1570.5754545454545 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.68 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique metallic orange dim 55.39 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique misty red olive 1922.98 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#4 almond antique violet mint lemon 1375.42 5716.95 1206.26 1844.92 1429.2375 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique blue firebrick mint 1789.69 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique sky peru orange 1788.73 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDoubleSum, VectorPTFEvaluatorDoubleMin, VectorPTFEvaluatorDoubleMax, VectorPTFEvaluatorDoubleAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: double) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:double:double:double:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_retailprice SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +vector_ptf_part_simple_text_decimal.p_mfgr vector_ptf_part_simple_text_decimal.p_name vector_ptf_part_simple_text_decimal.p_retailprice +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum, VectorPTFEvaluatorDecimalMin, VectorPTFEvaluatorDecimalMax, VectorPTFEvaluatorDecimalAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_decimal + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_decimal + name: default.vector_ptf_part_simple_orc_decimal + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_decimal [vector_ptf_part_simple_orc_decimal] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18) + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(38,18), decimal(38,18), decimal(38,18), decimal(38,18) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDecimalSum, VectorPTFEvaluatorDecimalMin, VectorPTFEvaluatorDecimalMax, VectorPTFEvaluatorDecimalAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: decimal(38,18)) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_decimal + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_decimal + name: default.vector_ptf_part_simple_orc_decimal + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_decimal [vector_ptf_part_simple_orc_decimal] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18) + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 10963.930000000000000000 1173.150000000000000000 1753.760000000000000000 1566.275714285714285714 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 12378.350000000000000000 1173.150000000000000000 1753.760000000000000000 1547.293750000000000000 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 9124.760000000000000000 1690.680000000000000000 2031.980000000000000000 1824.952000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 3125.880000000000000000 55.390000000000000000 1190.270000000000000000 625.176000000000000000 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 5048.860000000000000000 55.390000000000000000 1922.980000000000000000 841.476666666666666667 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 4426.600000000000000000 1206.260000000000000000 1844.920000000000000000 1475.533333333333333333 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 6801.740000000000000000 1611.660000000000000000 1789.690000000000000000 1700.435000000000000000 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 7819.840000000000000000 1018.100000000000000000 1789.690000000000000000 1563.968000000000000000 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_bigint EXPRESSION [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +p_mfgr p_name _c2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_bigint (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum, VectorPTFEvaluatorLongMin, VectorPTFEvaluatorLongMax, VectorPTFEvaluatorLongAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_bigint (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_long + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_long + name: default.vector_ptf_part_simple_orc_long + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_long [vector_ptf_part_simple_orc_long] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:bigint:bigint:bigint:bigint:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond aquamarine burnished black steel 141442 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet chocolate turquoise 169068 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique forest lavender goldenrod NULL 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique chartreuse khaki white 9968 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique metallic orange dim 5539 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique misty red olive 192298 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 59027 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 571695 120626 184492 142923.75 +Manufacturer#4 almond antique gainsboro frosted violet NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond antique violet mint lemon 137542 571695 120626 184492 142923.75 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique blue firebrick mint 178969 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique sky peru orange 178873 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 928432 101810 178969 154738.66666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_bigint (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, double + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorLongSum, VectorPTFEvaluatorLongMin, VectorPTFEvaluatorLongMax, VectorPTFEvaluatorLongAvg] + functionNames: [sum, min, max, avg] + native: true + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_bigint (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_long + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_long + name: default.vector_ptf_part_simple_orc_long + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_long [vector_ptf_part_simple_orc_long] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:bigint:bigint:bigint:bigint:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1096393 117315 175376 156627.57142857142 +Manufacturer#1 almond aquamarine burnished black steel 141442 1237835 117315 175376 154729.375 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond antique violet chocolate turquoise 169068 169068 169068 169068 169068.0 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 912476 169068 203198 182495.2 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique chartreuse khaki white 9968 9968 9968 9968 9968.0 +Manufacturer#3 almond antique forest lavender goldenrod 59027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod NULL 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique metallic orange dim 5539 312588 5539 119027 62517.6 +Manufacturer#3 almond antique misty red olive 192298 504886 5539 192298 84147.66666666667 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 137542 137542 137542 137542 137542.0 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 442660 120626 184492 147553.33333333334 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#5 almond antique blue firebrick mint 178969 178969 178969 178969 178969.0 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique sky peru orange 178873 680174 161166 178969 170043.5 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 781984 101810 178969 156396.8 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 894c529..e64a4e9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ] POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ] POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +114,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -146,16 +146,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -166,64 +169,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -246,20 +202,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -284,21 +236,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -306,26 +258,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -381,14 +318,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j distribute by j.p_mfgr sort by j.p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -418,20 +355,26 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -442,75 +385,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -521,60 +424,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p2] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -583,24 +439,19 @@ STAGE PLANS: 0 p_partkey (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -623,20 +474,16 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -661,7 +508,7 @@ STAGE PLANS: arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -670,26 +517,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -741,13 +573,13 @@ Manufacturer#5 almond antique medium spring khaki 6 -25 Manufacturer#5 almond antique sky peru orange 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -774,16 +606,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -794,64 +629,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -878,26 +666,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -945,7 +718,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond antique sky peru orange 2 Manufacturer#5 almond aquamarine dodger light gainsboro 46 Manufacturer#5 almond azure blanched chiffon midnight 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -955,7 +728,7 @@ from noop(on part_orc order by p_name ) abc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -987,16 +760,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1007,64 +783,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1087,20 +816,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -1125,21 +850,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -1147,26 +872,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1222,7 +932,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1232,7 +942,7 @@ from noop(on part_orc order by p_name ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1264,16 +974,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1284,64 +997,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1364,20 +1030,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1402,21 +1064,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1425,26 +1087,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1500,7 +1147,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1511,7 +1158,7 @@ from noop(on part_orc ) group by p_mfgr, p_name, p_size PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -1544,16 +1191,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1564,64 +1214,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -1653,19 +1256,15 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Group By Operator @@ -1691,21 +1290,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: lag_window_2 arguments: _col2, 1, _col2 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1714,26 +1313,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1790,14 +1374,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from noop(on part_orc partition by p_mfgr @@ -1826,16 +1410,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1846,75 +1433,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 4 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1925,64 +1472,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2004,21 +1504,16 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -2027,30 +1522,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2102,14 +1581,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr order by p_name ) abc on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -2138,19 +1617,25 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2161,72 +1646,29 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 3 Map Operator Tree: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2237,60 +1679,13 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -2299,7 +1694,6 @@ STAGE PLANS: 0 p_partkey (type: int) 1 _col0 (type: int) outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Position of Big Table: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) @@ -2307,33 +1701,17 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2355,18 +1733,14 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: true Stage: Stage-0 Fetch Operator @@ -2418,14 +1792,14 @@ POSTHOOK: Input: default@part_orc 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc partition by p_mfgr order by p_name, p_size desc) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -2454,7 +1828,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2473,12 +1846,9 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2487,64 +1857,12 @@ STAGE PLANS: inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2568,24 +1886,31 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - null sort order: aaz sort order: ++- Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 - Execution mode: llap - Needs Tagging: false + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2605,35 +1930,32 @@ STAGE PLANS: arguments: _col1, _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionNames: [rank] + native: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2683,7 +2005,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 Manufacturer#5 almond azure blanched chiffon midnight 23 5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2692,7 +2014,7 @@ from noopwithmap(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2723,7 +2045,6 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false PTF Operator Function definitions: Input definition @@ -2742,13 +2063,10 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -2757,64 +2075,12 @@ STAGE PLANS: inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: PTF Operator (PTF) not supported vectorized: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -2838,20 +2104,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2876,21 +2138,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -2898,26 +2160,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2971,7 +2218,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -2980,7 +2227,7 @@ from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3011,16 +2258,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3031,64 +2281,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3111,20 +2314,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -3149,21 +2348,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -3171,26 +2370,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3244,7 +2428,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3254,7 +2438,7 @@ partition by p_mfgr order by p_mfgr, p_name ))) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -3287,16 +2471,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3307,64 +2494,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3410,20 +2550,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3454,20 +2590,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -3492,21 +2624,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -3514,26 +2646,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3589,7 +2706,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3602,7 +2719,7 @@ order by p_name) window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) ) sub1 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -3637,16 +2754,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3657,64 +2777,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -3737,20 +2810,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -3775,13 +2844,13 @@ STAGE PLANS: arguments: _col5 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_1 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) @@ -3789,26 +2858,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:bigint:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -3870,7 +2924,7 @@ Manufacturer#5 almond antique medium spring khaki 2 6208.18 Manufacturer#5 almond antique sky peru orange 3 7672.66 Manufacturer#5 almond aquamarine dodger light gainsboro 4 5882.97 Manufacturer#5 almond azure blanched chiffon midnight 5 4271.31 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3882,7 +2936,7 @@ partition by p_mfgr order by p_name ) abc join part_orc p1 on abc.p_partkey = p1.p_partkey PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -3917,16 +2971,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3937,75 +2994,35 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0, 1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Map 5 Map Operator Tree: TableScan alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Filter Operator - isSamplingPred: false + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_partkey (type: int) - null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4016,64 +3033,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [p1] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [0] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4095,21 +3065,16 @@ STAGE PLANS: raw input shape: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Filter Operator - isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) - null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Merge Join Operator condition map: @@ -4118,24 +3083,19 @@ STAGE PLANS: 0 _col0 (type: int) 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 - Position of Big Table: 0 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -4160,33 +3120,33 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: count_window_2 arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: sum_window_3 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT window function definition alias: lag_window_4 arguments: _col5, 1, _col5 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -4195,26 +3155,11 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types string:string:int:int:bigint:double:double:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4276,13 +3221,13 @@ Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr order by p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -4310,16 +3255,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4330,64 +3278,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4419,15 +3320,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true Reducer 3 Execution mode: vectorized, llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true @@ -4435,34 +3332,32 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4530,7 +3425,7 @@ POSTHOOK: Output: default@mfgr_price_view POSTHOOK: Lineage: mfgr_price_view.p_brand SIMPLE [(part_orc)part_orc.FieldSchema(name:p_brand, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.p_mfgr SIMPLE [(part_orc)part_orc.FieldSchema(name:p_mfgr, type:string, comment:null), ] POSTHOOK: Lineage: mfgr_price_view.s EXPRESSION [(part_orc)part_orc.FieldSchema(name:p_retailprice, type:double, comment:null), ] -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4538,7 +3433,7 @@ partition by p_mfgr order by p_mfgr) window w1 as ( partition by p_mfgr order by p_brand rows between 2 preceding and current row) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -4568,26 +3463,40 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Select Operator expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_brand, p_retailprice + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 3, 7] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(p_retailprice) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 7) -> double + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 2, col 3 + native: false + projectedOutputColumns: [0] keys: p_mfgr (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4598,64 +3507,17 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [2, 3, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Group By Operator @@ -4684,20 +3546,16 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col2 (type: double) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -4722,7 +3580,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(2)~CURRENT + window frame: ROWS PRECEDING(2)~CURRENT Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) @@ -4730,26 +3588,11 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:double:double - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -4837,7 +3680,7 @@ fv1 INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@part_5 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4853,7 +3696,7 @@ cume_dist() over (distribute by p_mfgr sort by p_mfgr, p_name) as cud, first_value(p_size, true) over w1 as fv1 window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail from noop(on part_orc partition by p_mfgr order by p_name) @@ -4897,16 +3740,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4917,64 +3763,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5, 7] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4997,29 +3796,22 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int), _col7 (type: double) - auto parallelism: true Reduce Output Operator key expressions: _col2 (type: string), _col5 (type: int) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5044,21 +3836,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col7 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) @@ -5066,43 +3858,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum only UNBOUNDED start frame is supported vectorized: false Reduce Operator Tree: Select Operator @@ -5127,7 +3894,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(5)~CURRENT + window frame: RANGE PRECEDING(5)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), sum_window_0 (type: bigint) @@ -5135,20 +3902,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -5173,28 +3936,28 @@ STAGE PLANS: arguments: _col3, _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_2 arguments: _col3, _col2 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: cume_dist_window_3 arguments: _col3, _col2 name: cume_dist window function: GenericUDAFCumeDistEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: first_value_window_4 arguments: _col6, true name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) @@ -5202,36 +3965,12 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -5240,65 +3979,27 @@ STAGE PLANS: Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double -#### A masked pattern was here #### - name default.part_4 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 Stage: Stage-4 Stats-Aggr Operator -#### A masked pattern was here #### Stage: Stage-1 Move Operator tables: replace: true -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int -#### A masked pattern was here #### - name default.part_5 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 Stage: Stage-5 Stats-Aggr Operator -#### A masked pattern was here #### PREHOOK: query: from noop(on part_orc partition by p_mfgr @@ -5418,7 +4119,7 @@ Manufacturer#5 almond antique medium spring khaki 6 8 2 2 0.4 31 Manufacturer#5 almond antique sky peru orange 2 2 3 3 0.6 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 4 4 0.8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 5 5 1.0 2 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5435,7 +4136,7 @@ from noop(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -5475,16 +4176,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5495,64 +4199,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5605,20 +4262,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5649,20 +4302,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5687,21 +4336,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -5709,26 +4358,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -5798,7 +4432,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5815,7 +4449,7 @@ from noop(on partition by p_mfgr order by p_mfgr ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -5856,16 +4490,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string) - null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_name (type: string), p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5876,64 +4513,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5963,20 +4553,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -5999,20 +4585,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6035,20 +4617,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -6073,21 +4651,21 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -6095,26 +4673,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6184,7 +4747,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6199,7 +4762,7 @@ from noop(on partition by p_mfgr order by p_mfgr)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -6237,16 +4800,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6257,64 +4823,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6344,20 +4863,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6387,25 +4902,32 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 - Execution mode: llap - Needs Tagging: false + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -6425,48 +4947,45 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum] + functionNames: [rank, dense_rank, sum] + native: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6532,7 +5051,7 @@ Manufacturer#5 almond antique medium spring khaki 2 2 6 37 Manufacturer#5 almond antique sky peru orange 3 3 2 39 Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 46 85 Manufacturer#5 almond azure blanched chiffon midnight 5 5 23 108 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6549,7 +5068,7 @@ from noopwithmap(on partition by p_mfgr,p_name order by p_mfgr,p_name) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -6590,16 +5109,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -6610,64 +5132,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6697,20 +5172,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6749,20 +5220,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -6786,20 +5253,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 5 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -6824,21 +5287,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) @@ -6846,26 +5309,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types string:string:int:int:int:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -6935,7 +5383,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6951,7 +5399,7 @@ from noop(on order by p_mfgr )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -6990,16 +5438,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7010,64 +5461,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -7113,20 +5517,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string) - null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col1 (type: string), _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -7157,20 +5557,16 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -7195,21 +5591,21 @@ STAGE PLANS: arguments: _col2, _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col2, _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) @@ -7217,26 +5613,11 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -7304,7 +5685,7 @@ Manufacturer#5 almond antique medium spring khaki 1 1 6 6 6 Manufacturer#5 almond antique sky peru orange 1 1 2 2 2 Manufacturer#5 almond aquamarine dodger light gainsboro 1 1 46 46 46 Manufacturer#5 almond azure blanched chiffon midnight 1 1 23 23 23 -PREHOOK: query: explain vectorization extended +PREHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7318,7 +5699,7 @@ from noopwithmap(on order by p_mfgr, p_name) )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization extended +POSTHOOK: query: explain vectorization detail select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -7355,16 +5736,19 @@ STAGE PLANS: TableScan alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: p_size (type: int) - auto parallelism: true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7375,64 +5759,17 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: part_orc - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment - columns.comments - columns.types int:string:string:string:string:int:string:double:string -#### A masked pattern was here #### - name default.part_orc - numFiles 1 - numRows 26 - rawDataSize 16042 - serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 2689 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_orc - name: default.part_orc - Truncated Path -> Alias: - /part_orc [part_orc] + rowBatchContext: + dataColumnCount: 9 + includeColumns: [1, 2, 5] + dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string + partitionColumnCount: 0 Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -7478,20 +5815,16 @@ STAGE PLANS: Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported + notVectorizedReason: PTF operator: NOOP not supported vectorized: false Reduce Operator Tree: Select Operator @@ -7515,25 +5848,32 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) - null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 value expressions: _col5 (type: int) - auto parallelism: true Reducer 4 - Execution mode: llap - Needs Tagging: false + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Operator (PTF) not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -7553,48 +5893,45 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: dense_rank_window_1 arguments: _col1 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true window function definition alias: sum_window_2 arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorLongSum] + functionNames: [rank, dense_rank, sum] + native: true Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 2, 5, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 - columns.types string:string:int:int:int:bigint:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/windowing.q.out ql/src/test/results/clientpositive/llap/windowing.q.out index 468b67e..713fc3b 100644 --- ql/src/test/results/clientpositive/llap/windowing.q.out +++ ql/src/test/results/clientpositive/llap/windowing.q.out @@ -1868,7 +1868,7 @@ STAGE PLANS: arguments: _col5 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(2)~FOLLOWING(2) + window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), sum_window_0 (type: bigint) diff --git ql/src/test/results/clientpositive/pcs.q.out ql/src/test/results/clientpositive/pcs.q.out index dc2a476..af5d11a 100644 --- ql/src/test/results/clientpositive/pcs.q.out +++ ql/src/test/results/clientpositive/pcs.q.out @@ -999,7 +999,7 @@ STAGE PLANS: arguments: _col0 name: lag window function: GenericUDAFLagEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/ppd_windowing1.q.out ql/src/test/results/clientpositive/ppd_windowing1.q.out index 68b1c0e..c072869 100644 --- ql/src/test/results/clientpositive/ppd_windowing1.q.out +++ ql/src/test/results/clientpositive/ppd_windowing1.q.out @@ -44,7 +44,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -110,7 +110,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -176,7 +176,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -242,7 +242,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -308,7 +308,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col0) + 2) (type: int), sum_window_0 (type: double) @@ -374,7 +374,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -440,7 +440,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -506,7 +506,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -572,7 +572,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -638,7 +638,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col0) + 2) (type: int), sum_window_0 (type: double) @@ -705,7 +705,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -751,7 +751,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -818,7 +818,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -864,7 +864,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -931,7 +931,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -977,7 +977,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1044,7 +1044,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -1090,7 +1090,7 @@ STAGE PLANS: arguments: _col1 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col1) + 2) (type: int), _col0 (type: double), sum_window_1 (type: double) @@ -1157,7 +1157,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1203,7 +1203,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1270,7 +1270,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1316,7 +1316,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1383,7 +1383,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1429,7 +1429,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), sum_window_1 (type: double) @@ -1496,7 +1496,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1542,7 +1542,7 @@ STAGE PLANS: arguments: _col2 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToInteger(_col1) + 2) (type: int), _col0 (type: double), sum_window_1 (type: double) @@ -1606,7 +1606,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1652,7 +1652,7 @@ STAGE PLANS: arguments: _col2 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: double), avg_window_1 (type: double) @@ -1719,7 +1719,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -1786,7 +1786,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), sum_window_0 (type: double) @@ -1852,7 +1852,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1919,7 +1919,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) @@ -1986,7 +1986,7 @@ STAGE PLANS: arguments: _col0 name: sum window function: GenericUDAFSumDouble - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), sum_window_0 (type: double) diff --git ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out index bdb2199..b69c230 100644 --- ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out +++ ql/src/test/results/clientpositive/ptfgroupbyjoin.q.out @@ -247,7 +247,7 @@ STAGE PLANS: alias: row_number_window_0 name: row_number window function: GenericUDAFRowNumberEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/quotedid_basic.q.out ql/src/test/results/clientpositive/quotedid_basic.q.out index 8897ae9..83ed387 100644 --- ql/src/test/results/clientpositive/quotedid_basic.q.out +++ ql/src/test/results/clientpositive/quotedid_basic.q.out @@ -217,7 +217,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -319,7 +319,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/semijoin2.q.out ql/src/test/results/clientpositive/semijoin2.q.out index 757341a..d6a0b90 100644 --- ql/src/test/results/clientpositive/semijoin2.q.out +++ ql/src/test/results/clientpositive/semijoin2.q.out @@ -163,7 +163,7 @@ STAGE PLANS: arguments: COALESCE((- 973),(- 684),515) name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -210,7 +210,7 @@ STAGE PLANS: arguments: COALESCE(62,(- 380),(- 435)) name: SUM window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~FOLLOWING(48) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: COALESCE(498,_col0,524) (type: int), (_col99 + _col17) (type: int), floor(_col22) (type: bigint), COALESCE(SUM_window_1,704) (type: bigint) diff --git ql/src/test/results/clientpositive/semijoin4.q.out ql/src/test/results/clientpositive/semijoin4.q.out index d6117ed..1cf8c96 100644 --- ql/src/test/results/clientpositive/semijoin4.q.out +++ ql/src/test/results/clientpositive/semijoin4.q.out @@ -175,7 +175,7 @@ STAGE PLANS: arguments: -973 name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/semijoin5.q.out ql/src/test/results/clientpositive/semijoin5.q.out index fd8e372..a1b0ab8 100644 --- ql/src/test/results/clientpositive/semijoin5.q.out +++ ql/src/test/results/clientpositive/semijoin5.q.out @@ -172,7 +172,7 @@ STAGE PLANS: arguments: -973 name: LEAD window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator @@ -219,7 +219,7 @@ STAGE PLANS: arguments: 62 name: sum window function: GenericUDAFSumLong - window frame: PRECEDING(MAX)~FOLLOWING(48) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 7473928..25ed36a 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -1808,7 +1808,7 @@ STAGE PLANS: arguments: _col1 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: first_value_window_0 is not null (type: boolean) diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 94210fd..4d7a7da 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -340,7 +340,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -506,7 +506,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git ql/src/test/results/clientpositive/union_remove_6_subq.q.out ql/src/test/results/clientpositive/union_remove_6_subq.q.out index 0bd00c9..dbe2bd9 100644 --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -525,7 +525,7 @@ STAGE PLANS: arguments: _col1 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: ROWS PRECEDING(MAX)~CURRENT Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), avg_window_0 (type: double) diff --git ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out new file mode 100644 index 0000000..87a09b5 --- /dev/null +++ ql/src/test/results/clientpositive/vector_ptf_part_simple.q.out @@ -0,0 +1,3005 @@ +PREHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: create table vector_ptf_part_simple_text(p_mfgr string, p_name string, p_retailprice double) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text +PREHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: create table vector_ptf_part_simple_orc(p_mfgr string, p_name string, p_retailprice double) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text +PREHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc SELECT * FROM vector_ptf_part_simple_text +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text +POSTHOOK: Output: default@vector_ptf_part_simple_orc +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_mfgr SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_name SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc.p_retailprice SIMPLE [(vector_ptf_part_simple_text)vector_ptf_part_simple_text.FieldSchema(name:p_retailprice, type:double, comment:null), ] +vector_ptf_part_simple_text.p_mfgr vector_ptf_part_simple_text.p_name vector_ptf_part_simple_text.p_retailprice +PREHOOK: query: select * from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +vector_ptf_part_simple_orc.p_mfgr vector_ptf_part_simple_orc.p_name vector_ptf_part_simple_orc.p_retailprice +Manufacturer#2 almond aquamarine rose maroon antique 900.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#5 almond antique blue firebrick mint 1789.69 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique forest lavender goldenrod NULL +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 +Manufacturer#4 almond antique violet mint lemon 1375.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#5 almond antique sky peru orange 1788.73 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 +Manufacturer#3 almond antique chartreuse khaki white 99.68 +Manufacturer#4 almond antique gainsboro frosted violet NULL +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 +Manufacturer#3 almond antique olive coral navajo 1337.29 +Manufacturer#5 almond antique medium spring khaki 1611.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 +Manufacturer#3 almond antique misty red olive 1922.98 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 +Manufacturer#4 almond aquamarine floral ivory bisque NULL +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 +Manufacturer#3 almond antique metallic orange dim 55.39 +Manufacturer#1 almond antique burnished rose metallic 1173.15 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: double) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col0 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col0 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:string:double:int:int:int:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr) as rn, +rank() over(partition by p_mfgr) as r, +dense_rank() over(partition by p_mfgr) as dr, +first_value(p_retailprice) over(partition by p_mfgr) as fv, +last_value(p_retailprice) over(partition by p_mfgr) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 1 1 1 1753.76 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 2 1 1 1753.76 1632.66 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 1 1 1753.76 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 4 1 1 1753.76 1632.66 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 1 1 1753.76 1632.66 +Manufacturer#1 almond antique burnished rose metallic 1173.15 6 1 1 1753.76 1632.66 +Manufacturer#1 almond antique burnished rose metallic 1173.15 7 1 1 1753.76 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 8 1 1 1753.76 1632.66 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 9 1 1 1753.76 1632.66 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 10 1 1 1753.76 1632.66 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 11 1 1 1753.76 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 1 1 1753.76 1632.66 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 1 1 1 900.66 1698.66 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 2 1 1 900.66 1698.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 3 1 1 900.66 1698.66 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 1 1 900.66 1698.66 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 5 1 1 900.66 1698.66 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 6 1 1 900.66 1698.66 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7 1 1 900.66 1698.66 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 8 1 1 900.66 1698.66 +Manufacturer#3 almond antique olive coral navajo 1337.29 1 1 1 1337.29 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 2 1 1 1337.29 99.68 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3 1 1 1337.29 99.68 +Manufacturer#3 almond antique metallic orange dim 55.39 4 1 1 1337.29 99.68 +Manufacturer#3 almond antique misty red olive 1922.98 5 1 1 1337.29 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6 1 1 1337.29 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 7 1 1 1337.29 99.68 +Manufacturer#3 almond antique chartreuse khaki white 99.68 8 1 1 1337.29 99.68 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 1 1 1 1290.35 1206.26 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 2 1 1 1290.35 1206.26 +Manufacturer#4 almond antique gainsboro frosted violet NULL 3 1 1 1290.35 1206.26 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 4 1 1 1290.35 1206.26 +Manufacturer#4 almond antique violet mint lemon 1375.42 5 1 1 1290.35 1206.26 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 6 1 1 1290.35 1206.26 +Manufacturer#5 almond antique sky peru orange 1788.73 1 1 1 1788.73 1018.1 +Manufacturer#5 almond antique blue firebrick mint 1789.69 2 1 1 1788.73 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 3 1 1 1788.73 1018.1 +Manufacturer#5 almond antique medium spring khaki 1611.66 4 1 1 1788.73 1018.1 +Manufacturer#5 almond antique medium spring khaki 1611.66 5 1 1 1788.73 1018.1 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 6 1 1 1788.73 1018.1 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: double) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: rank_window_1 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: dense_rank_window_2 + arguments: _col1 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + window function definition + alias: first_value_window_3 + arguments: _col2 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: last_value_window_4 + arguments: _col2 + name: last_value + window function: GenericUDAFLastValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), row_number_window_0 (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), first_value_window_3 (type: double), last_value_window_4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:string:double:int:int:int:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +row_number() over(partition by p_mfgr order by p_name) as rn, +rank() over(partition by p_mfgr order by p_name) as r, +dense_rank() over(partition by p_mfgr order by p_name) as dr, +first_value(p_retailprice) over(partition by p_mfgr order by p_name) as fv, +last_value(p_retailprice) over(partition by p_mfgr order by p_name) as lv +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice rn r dr fv lv +Manufacturer#1 almond antique burnished rose metallic 1173.15 1 1 1 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2 1 1 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 3 3 2 1173.15 1753.76 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 4 3 2 1173.15 1753.76 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 5 3 2 1173.15 1753.76 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 6 3 2 1173.15 1753.76 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 7 7 3 1173.15 1602.59 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 8 8 4 1173.15 1414.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 9 9 5 1173.15 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 10 9 5 1173.15 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 11 9 5 1173.15 1632.66 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 12 9 5 1173.15 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1 1 1 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 2 2 2 1690.68 1800.7 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 3 2 2 1690.68 1800.7 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 4 2 2 1690.68 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 5 5 3 1690.68 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 6 6 4 1690.68 1698.66 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 7 6 4 1690.68 1698.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 8 8 5 1690.68 1000.6 +Manufacturer#3 almond antique chartreuse khaki white 99.68 1 1 1 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 2 2 2 99.68 NULL +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3 2 2 99.68 NULL +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 4 2 2 99.68 NULL +Manufacturer#3 almond antique forest lavender goldenrod NULL 5 2 2 99.68 NULL +Manufacturer#3 almond antique metallic orange dim 55.39 6 6 3 99.68 55.39 +Manufacturer#3 almond antique misty red olive 1922.98 7 7 4 99.68 1922.98 +Manufacturer#3 almond antique olive coral navajo 1337.29 8 8 5 99.68 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet NULL 1 1 1 NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 2 2 2 NULL 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 3 3 3 NULL 1206.26 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 4 3 3 NULL 1206.26 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5 5 4 NULL 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 6 6 5 NULL 1290.35 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1 1 1 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 2 2 2 1789.69 1611.66 +Manufacturer#5 almond antique medium spring khaki 1611.66 3 2 2 1789.69 1611.66 +Manufacturer#5 almond antique sky peru orange 1788.73 4 4 3 1789.69 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 5 5 4 1789.69 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 6 6 5 1789.69 1464.48 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: double) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:double:double:double:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique burnished rose metallic 1173.15 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 12724.68 900.66 2031.98 1590.585 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique metallic orange dim 55.39 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique misty red olive 1922.98 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.68 6386.1500000000015 55.39 1922.98 912.307142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond antique violet mint lemon 1375.42 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique sky peru orange 1788.73 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique blue firebrick mint 1789.69 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond antique medium spring khaki 1611.66 9284.32 1018.1 1789.69 1547.3866666666665 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: double) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:double +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc { string p_mfgr, string p_name, double p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1319 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc + name: default.vector_ptf_part_simple_orc + Truncated Path -> Alias: + /vector_ptf_part_simple_orc [vector_ptf_part_simple_orc] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:double:double:double:double:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 1173.15 2346.3 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.76 9361.34 1173.15 1753.76 1560.2233333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.59 10963.93 1173.15 1753.76 1566.2757142857142 +Manufacturer#1 almond aquamarine burnished black steel 1414.42 12378.35 1173.15 1753.76 1547.29375 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.66 17276.33 1173.15 1753.76 1570.5754545454547 +Manufacturer#2 almond antique violet chocolate turquoise 1690.68 1690.68 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond antique violet turquoise frosted 1800.7 7092.780000000001 1690.68 1800.7 1773.1950000000002 +Manufacturer#2 almond aquamarine midnight light salmon 2031.98 9124.76 1690.68 2031.98 1824.952 +Manufacturer#2 almond aquamarine rose maroon antique 900.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine rose maroon antique 1698.66 11724.08 900.66 2031.98 1674.8685714285714 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.6 12724.68 900.66 2031.98 1590.585 +Manufacturer#3 almond antique chartreuse khaki white 99.68 99.68 99.68 99.68 99.68 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 590.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod 1190.27 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.49 99.68 1190.27 767.6225 +Manufacturer#3 almond antique metallic orange dim 55.39 3125.8799999999997 55.39 1190.27 625.1759999999999 +Manufacturer#3 almond antique misty red olive 1922.98 5048.86 55.39 1922.98 841.4766666666666 +Manufacturer#3 almond antique olive coral navajo 1337.29 6386.15 55.39 1922.98 912.3071428571428 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.42 1375.42 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.26 2581.6800000000003 1206.26 1375.42 1290.8400000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.92 4426.6 1206.26 1844.92 1475.5333333333335 +Manufacturer#4 almond azure aquamarine papaya violet 1290.35 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#5 almond antique blue firebrick mint 1789.69 1789.69 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique medium spring khaki 1611.66 5013.01 1611.66 1789.69 1671.0033333333333 +Manufacturer#5 almond antique sky peru orange 1788.73 6801.74 1611.66 1789.69 1700.435 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.1 7819.84 1018.1 1789.69 1563.968 +Manufacturer#5 almond azure blanched chiffon midnight 1464.48 9284.32 1018.1 1789.69 1547.3866666666665 +PREHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: create table vector_ptf_part_simple_text_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY '\t' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vector_ptf_part_simple_text_decimal +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vector_ptf_part_simple.txt' OVERWRITE INTO TABLE vector_ptf_part_simple_text_decimal +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vector_ptf_part_simple_text_decimal +PREHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: create table vector_ptf_part_simple_orc_decimal(p_mfgr string, p_name string, p_retailprice decimal(38,18)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_decimal SELECT * FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_decimal +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_decimal.p_retailprice SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +vector_ptf_part_simple_text_decimal.p_mfgr vector_ptf_part_simple_text_decimal.p_name vector_ptf_part_simple_text_decimal.p_retailprice +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_decimal + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_decimal + name: default.vector_ptf_part_simple_orc_decimal + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_decimal [vector_ptf_part_simple_orc_decimal] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18) + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr) as s, +min(p_retailprice) over(partition by p_mfgr) as mi, +max(p_retailprice) over(partition by p_mfgr) as ma, +avg(p_retailprice) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod NULL 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique gainsboro frosted violet NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + value expressions: p_retailprice (type: decimal(38,18)) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_retailprice:decimal(38,18) + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_decimal + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_retailprice (type: decimal(38,18)) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_decimal + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_retailprice + columns.comments + columns.types string:string:decimal(38,18) +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_decimal + numFiles 1 + numRows 40 + rawDataSize 12792 + serialization.ddl struct vector_ptf_part_simple_orc_decimal { string p_mfgr, string p_name, decimal(38,18) p_retailprice} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1283 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_decimal + name: default.vector_ptf_part_simple_orc_decimal + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_decimal [vector_ptf_part_simple_orc_decimal] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: decimal(38,18) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(38,18)), sum_window_0 (type: decimal(38,18)), min_window_1 (type: decimal(38,18)), max_window_2 (type: decimal(38,18)), avg_window_3 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 12792 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18):decimal(38,18) + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_retailprice, +sum(p_retailprice) over(partition by p_mfgr order by p_name) as s, +min(p_retailprice) over(partition by p_mfgr order by p_name) as mi, +max(p_retailprice) over(partition by p_mfgr order by p_name) as ma, +avg(p_retailprice) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_decimal +#### A masked pattern was here #### +p_mfgr p_name p_retailprice s mi ma av +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique burnished rose metallic 1173.150000000000000000 2346.300000000000000000 1173.150000000000000000 1173.150000000000000000 1173.150000000000000000 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 1753.760000000000000000 9361.340000000000000000 1173.150000000000000000 1753.760000000000000000 1560.223333333333333333 +Manufacturer#1 almond antique salmon chartreuse burlywood 1602.590000000000000000 10963.930000000000000000 1173.150000000000000000 1753.760000000000000000 1566.275714285714285714 +Manufacturer#1 almond aquamarine burnished black steel 1414.420000000000000000 12378.350000000000000000 1173.150000000000000000 1753.760000000000000000 1547.293750000000000000 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#1 almond aquamarine pink moccasin thistle 1632.660000000000000000 17276.330000000000000000 1173.150000000000000000 1753.760000000000000000 1570.575454545454545455 +Manufacturer#2 almond antique violet chocolate turquoise 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 1690.680000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond antique violet turquoise frosted 1800.700000000000000000 7092.780000000000000000 1690.680000000000000000 1800.700000000000000000 1773.195000000000000000 +Manufacturer#2 almond aquamarine midnight light salmon 2031.980000000000000000 9124.760000000000000000 1690.680000000000000000 2031.980000000000000000 1824.952000000000000000 +Manufacturer#2 almond aquamarine rose maroon antique 900.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine rose maroon antique 1698.660000000000000000 11724.080000000000000000 900.660000000000000000 2031.980000000000000000 1674.868571428571428571 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1000.600000000000000000 12724.680000000000000000 900.660000000000000000 2031.980000000000000000 1590.585000000000000000 +Manufacturer#3 almond antique chartreuse khaki white 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 99.680000000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 590.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod 1190.270000000000000000 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique forest lavender goldenrod NULL 3070.490000000000000000 99.680000000000000000 1190.270000000000000000 767.622500000000000000 +Manufacturer#3 almond antique metallic orange dim 55.390000000000000000 3125.880000000000000000 55.390000000000000000 1190.270000000000000000 625.176000000000000000 +Manufacturer#3 almond antique misty red olive 1922.980000000000000000 5048.860000000000000000 55.390000000000000000 1922.980000000000000000 841.476666666666666667 +Manufacturer#3 almond antique olive coral navajo 1337.290000000000000000 6386.150000000000000000 55.390000000000000000 1922.980000000000000000 912.307142857142857143 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 1375.420000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine floral ivory bisque 1206.260000000000000000 2581.680000000000000000 1206.260000000000000000 1375.420000000000000000 1290.840000000000000000 +Manufacturer#4 almond aquamarine yellow dodger mint 1844.920000000000000000 4426.600000000000000000 1206.260000000000000000 1844.920000000000000000 1475.533333333333333333 +Manufacturer#4 almond azure aquamarine papaya violet 1290.350000000000000000 5716.950000000000000000 1206.260000000000000000 1844.920000000000000000 1429.237500000000000000 +Manufacturer#5 almond antique blue firebrick mint 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 1789.690000000000000000 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique medium spring khaki 1611.660000000000000000 5013.010000000000000000 1611.660000000000000000 1789.690000000000000000 1671.003333333333333333 +Manufacturer#5 almond antique sky peru orange 1788.730000000000000000 6801.740000000000000000 1611.660000000000000000 1789.690000000000000000 1700.435000000000000000 +Manufacturer#5 almond aquamarine dodger light gainsboro 1018.100000000000000000 7819.840000000000000000 1018.100000000000000000 1789.690000000000000000 1563.968000000000000000 +Manufacturer#5 almond azure blanched chiffon midnight 1464.480000000000000000 9284.320000000000000000 1018.100000000000000000 1789.690000000000000000 1547.386666666666666667 +PREHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: create table vector_ptf_part_simple_orc_long(p_mfgr string, p_name string, p_bigint bigint) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +PREHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_text_decimal +PREHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: query: INSERT INTO TABLE vector_ptf_part_simple_orc_long SELECT p_mfgr, p_name, cast(p_retailprice * 100 as bigint) FROM vector_ptf_part_simple_text_decimal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_text_decimal +POSTHOOK: Output: default@vector_ptf_part_simple_orc_long +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_bigint EXPRESSION [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_retailprice, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_mfgr SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: vector_ptf_part_simple_orc_long.p_name SIMPLE [(vector_ptf_part_simple_text_decimal)vector_ptf_part_simple_text_decimal.FieldSchema(name:p_name, type:string, comment:null), ] +p_mfgr p_name _c2 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string) + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_bigint (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_name (type: string), p_bigint (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_long + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_long + name: default.vector_ptf_part_simple_orc_long + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_long [vector_ptf_part_simple_orc_long] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:bigint:bigint:bigint:bigint:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr) as s, +min(p_bigint) over(partition by p_mfgr) as mi, +max(p_bigint) over(partition by p_mfgr) as ma, +avg(p_bigint) over(partition by p_mfgr) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique burnished rose metallic 117315 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine burnished black steel 141442 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet chocolate turquoise 169068 1272468 90066 203198 159058.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 1272468 90066 203198 159058.5 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 59027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod NULL 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique metallic orange dim 5539 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique misty red olive 192298 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique forest lavender goldenrod 119027 638615 5539 192298 91230.71428571429 +Manufacturer#3 almond antique chartreuse khaki white 9968 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 571695 120626 184492 142923.75 +Manufacturer#4 almond antique gainsboro frosted violet NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 571695 120626 184492 142923.75 +Manufacturer#4 almond antique violet mint lemon 137542 571695 120626 184492 142923.75 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 571695 120626 184492 142923.75 +Manufacturer#5 almond antique sky peru orange 178873 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique blue firebrick mint 178969 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond antique medium spring khaki 161166 928432 101810 178969 154738.66666666666 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 928432 101810 178969 154738.66666666666 +PREHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2] + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + value expressions: p_bigint (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: p_mfgr:string, p_name:string, p_bigint:bigint + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vector_ptf_part_simple_orc_long + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_bigint (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: vector_ptf_part_simple_orc_long + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns p_mfgr,p_name,p_bigint + columns.comments + columns.types string:string:bigint +#### A masked pattern was here #### + name default.vector_ptf_part_simple_orc_long + numFiles 1 + numRows 40 + rawDataSize 9048 + serialization.ddl struct vector_ptf_part_simple_orc_long { string p_mfgr, string p_name, i64 p_bigint} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1205 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.vector_ptf_part_simple_orc_long + name: default.vector_ptf_part_simple_orc_long + Truncated Path -> Alias: + /vector_ptf_part_simple_orc_long [vector_ptf_part_simple_orc_long] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumLong + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: min_window_1 + arguments: _col2 + name: min + window function: GenericUDAFMinEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: max_window_2 + arguments: _col2 + name: max + window function: GenericUDAFMaxEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + window function definition + alias: avg_window_3 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), sum_window_0 (type: bigint), min_window_1 (type: bigint), max_window_2 (type: bigint), avg_window_3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 9048 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string:string:bigint:bigint:bigint:bigint:double + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr,p_name, p_bigint, +sum(p_bigint) over(partition by p_mfgr order by p_name) as s, +min(p_bigint) over(partition by p_mfgr order by p_name) as mi, +max(p_bigint) over(partition by p_mfgr order by p_name) as ma, +avg(p_bigint) over(partition by p_mfgr order by p_name) as av +from vector_ptf_part_simple_orc_long +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_ptf_part_simple_orc_long +#### A masked pattern was here #### +p_mfgr p_name p_bigint s mi ma av +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique burnished rose metallic 117315 234630 117315 117315 117315.0 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique chartreuse lavender yellow 175376 936134 117315 175376 156022.33333333334 +Manufacturer#1 almond antique salmon chartreuse burlywood 160259 1096393 117315 175376 156627.57142857142 +Manufacturer#1 almond aquamarine burnished black steel 141442 1237835 117315 175376 154729.375 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle NULL 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#1 almond aquamarine pink moccasin thistle 163266 1727633 117315 175376 157057.54545454544 +Manufacturer#2 almond antique violet chocolate turquoise 169068 169068 169068 169068 169068.0 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond antique violet turquoise frosted 180070 709278 169068 180070 177319.5 +Manufacturer#2 almond aquamarine midnight light salmon 203198 912476 169068 203198 182495.2 +Manufacturer#2 almond aquamarine rose maroon antique 90066 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine rose maroon antique 169866 1172408 90066 203198 167486.85714285713 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 100060 1272468 90066 203198 159058.5 +Manufacturer#3 almond antique chartreuse khaki white 9968 9968 9968 9968 9968.0 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 59027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod 119027 307049 9968 119027 76762.25 +Manufacturer#3 almond antique forest lavender goldenrod NULL 307049 9968 119027 76762.25 +Manufacturer#3 almond antique metallic orange dim 5539 312588 5539 119027 62517.6 +Manufacturer#3 almond antique misty red olive 192298 504886 5539 192298 84147.66666666667 +Manufacturer#3 almond antique olive coral navajo 133729 638615 5539 192298 91230.71428571429 +Manufacturer#4 almond antique gainsboro frosted violet NULL NULL NULL NULL NULL +Manufacturer#4 almond antique violet mint lemon 137542 137542 137542 137542 137542.0 +Manufacturer#4 almond aquamarine floral ivory bisque NULL 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine floral ivory bisque 120626 258168 120626 137542 129084.0 +Manufacturer#4 almond aquamarine yellow dodger mint 184492 442660 120626 184492 147553.33333333334 +Manufacturer#4 almond azure aquamarine papaya violet 129035 571695 120626 184492 142923.75 +Manufacturer#5 almond antique blue firebrick mint 178969 178969 178969 178969 178969.0 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique medium spring khaki 161166 501301 161166 178969 167100.33333333334 +Manufacturer#5 almond antique sky peru orange 178873 680174 161166 178969 170043.5 +Manufacturer#5 almond aquamarine dodger light gainsboro 101810 781984 101810 178969 156396.8 +Manufacturer#5 almond azure blanched chiffon midnight 146448 928432 101810 178969 154738.66666666666 diff --git ql/src/test/results/clientpositive/windowing_gby2.q.out ql/src/test/results/clientpositive/windowing_gby2.q.out index adb3296..163d981 100644 --- ql/src/test/results/clientpositive/windowing_gby2.q.out +++ ql/src/test/results/clientpositive/windowing_gby2.q.out @@ -86,7 +86,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -211,7 +211,7 @@ STAGE PLANS: arguments: _col0 name: avg window function: GenericUDAFAverageEvaluatorDouble - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: avg_window_0 (type: double) @@ -338,7 +338,7 @@ STAGE PLANS: arguments: _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -385,7 +385,7 @@ STAGE PLANS: arguments: _col4 name: dense_rank window function: GenericUDAFDenseRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -432,7 +432,7 @@ STAGE PLANS: arguments: _col7 name: percent_rank window function: GenericUDAFPercentRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -615,7 +615,7 @@ STAGE PLANS: arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/windowing_streaming.q.out ql/src/test/results/clientpositive/windowing_streaming.q.out index a4bbef2..8d1071f 100644 --- ql/src/test/results/clientpositive/windowing_streaming.q.out +++ ql/src/test/results/clientpositive/windowing_streaming.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -162,7 +162,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -362,7 +362,7 @@ STAGE PLANS: arguments: _col5 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator diff --git ql/src/test/results/clientpositive/windowing_windowspec.q.out ql/src/test/results/clientpositive/windowing_windowspec.q.out index 52fe871..9da6183 100644 --- ql/src/test/results/clientpositive/windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/windowing_windowspec.q.out @@ -800,6 +800,114 @@ POSTHOOK: Input: default@over10k 71.68 722.6499947607517 79.46 802.1099938452244 80.02 882.1299904882908 +PREHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +3.17 NULL +10.89 3.1700000762939453 +14.54 14.0600004196167 +14.78 25.43000030517578 +17.85 29.31999969482422 +20.61 32.63000011444092 +28.69 38.46000099182129 +29.22 49.30000114440918 +31.17 57.90999984741211 +38.35 60.38999938964844 +38.61 69.51999855041504 +39.48 76.95999908447266 +40.54 78.09000015258789 +41.6 80.02000045776367 +46.08 82.13999938964844 +54.36 87.68000030517578 +56.94 100.44000244140625 +64.96 111.29999923706055 +73.52 121.89999771118164 +78.58 138.47999572753906 +81.41 152.0999984741211 +84.71 159.99000549316406 +87.43 166.12000274658203 +91.36 172.13999938964844 +92.96 178.79000091552734 +95.04 184.31999969482422 +0.83 NULL +1.99 0.8299999833106995 +3.73 2.8199999928474426 +8.86 5.7200000286102295 +10.62 12.589999675750732 +11.32 19.479999542236328 +12.83 21.9399995803833 +14.7 24.149999618530273 +14.96 27.52999973297119 +17.58 29.65999984741211 +19.1 32.53999996185303 +21.01 36.68000030517578 +26.95 40.11000061035156 +27.23 47.96000099182129 +29.07 54.18000030517578 +29.71 56.29999923706055 +31.84 58.779998779296875 +31.94 61.54999923706055 +35.32 63.78000068664551 +37.32 67.26000022888184 +38.5 72.63999938964844 +42.08 75.81999969482422 +44.3 80.58000183105469 +44.66 86.38000106811523 +46.84 88.95999908447266 +48.89 91.5 +49.64 95.72999954223633 +50.28 98.52999877929688 +52.09 99.91999816894531 +53.26 102.36999893188477 +54.09 105.3499984741211 +56.45 107.3499984741211 +56.76 110.54000091552734 +61.41 113.20999908447266 +61.88 118.16999816894531 +63.03 123.29000091552734 +64.55 124.90999984741211 +68.62 127.58000183105469 +76.13 133.17000579833984 +79.05 144.75 +80.43 155.18000030517578 +81.41 159.4800033569336 +82.85 161.84000396728516 +83.98 164.26000213623047 +84.21 166.8300018310547 +85.55 168.19000244140625 +87.93 169.76000213623047 +88.93 173.4800033569336 +94.27 176.86000061035156 +99.45 183.1999969482422 +0.36 NULL +0.48 0.36000001430511475 +0.79 0.8400000035762787 +1.27 1.270000010728836 +4.48 2.060000002384186 +9.0 5.75 +23.27 13.480000019073486 +25.13 32.27000045776367 +25.34 48.39999961853027 +25.91 50.46999931335449 +29.01 51.25 +30.47 54.920000076293945 +37.95 59.47999954223633 +39.3 68.42000007629395 +45.91 77.25 +52.44 85.20999908447266 +54.1 98.3499984741211 +56.7 106.53999710083008 +58.77 110.79999923706055 +62.09 115.47000122070312 +68.2 120.86000061035156 +71.68 130.28999710083008 +79.46 139.87999725341797 +80.02 151.13999938964844 PREHOOK: query: select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7 PREHOOK: type: QUERY PREHOOK: Input: default@over10k