diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java index 46ee261..b897d85 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -97,15 +97,15 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc return; } } + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i++]; if (isGroupResultNull) { - max.set(vector[i++]); + max.set(firstValue); isGroupResultNull = false; - } else { - final HiveDecimalWritable dec = vector[i++]; - if (dec.compareTo(max) == 1) { - max.set(dec); - } + } else if (firstValue.compareTo(max) == 1) { + max.set(firstValue); } for (; i < size; i++) { if (!batchIsNull[i]) { @@ -133,11 +133,9 @@ public HiveDecimalWritable getDecimalGroupResult() { return max; } - private static HiveDecimal MIN_VALUE = HiveDecimal.create("-99999999999999999999999999999999999999"); - @Override public void resetEvaluator() { isGroupResultNull = true; - max.set(MIN_VALUE); + max.setFromLong(0); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java index f881356..ae97b3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -98,14 +98,13 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i++]; if (isGroupResultNull) { - min.set(vector[i++]); + min.set(firstValue); isGroupResultNull = false; - } else { - final HiveDecimalWritable dec = vector[i++]; - if (dec.compareTo(min) == -1) { - min.set(dec); - } + } else if (firstValue.compareTo(min) == -1) { + min.set(firstValue); } for (; i < size; i++) { if (!batchIsNull[i]) { @@ -133,11 +132,9 @@ public HiveDecimalWritable getDecimalGroupResult() { return min; } - private static HiveDecimal MAX_VALUE = HiveDecimal.create("99999999999999999999999999999999999999"); - @Override public void resetEvaluator() { isGroupResultNull = true; - min.set(MAX_VALUE); + min.setFromLong(0); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java index 50280d9..2aaa62c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -64,7 +64,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc isGroupResultNull = false; } else { final double repeatedMax = doubleColVector.vector[0]; - if (repeatedMax < max) { + if (repeatedMax > max) { max = repeatedMax; } } @@ -129,6 +129,6 @@ public double getDoubleGroupResult() { @Override public void resetEvaluator() { isGroupResultNull = true; - max = Double.MIN_VALUE; + max = 0.0; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java index 24788af..ad84ad44 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -129,6 +129,6 @@ public double getDoubleGroupResult() { @Override public void resetEvaluator() { isGroupResultNull = true; - min = Double.MAX_VALUE; + min = 0.0; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java new file mode 100644 index 0000000..5320afc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates streaming HiveDecimal avg() for a PTF group. + * + * Stream average non-null column values and output sum / non-null count as we go along. + */ +public class VectorPTFEvaluatorStreamingDecimalAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable sum; + private int nonNullGroupCount; + private HiveDecimalWritable temp; + private HiveDecimalWritable avg; + + public VectorPTFEvaluatorStreamingDecimalAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + avg = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null decimal column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + HiveDecimalWritable repeatedValue = decimalColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum.mutateAdd(repeatedValue); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + outputColVector.set(i, avg); + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputColVector.set(0, avg); + } + outputColVector.isRepeating = true; + } + } else if (decimalColVector.noNulls) { + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + outputColVector.set(i, avg); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputColVector.set(i, avg); + } + if (++i >= size) { + return; + } + } + + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + + outputColVector.set(i++, avg); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + + // Output row i AVG. + outputColVector.set(i, avg); + } else { + + // Continue previous AVG. + outputColVector.set(i, avg); + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO); + nonNullGroupCount = 0; + avg.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java new file mode 100644 index 0000000..a79f46d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable max; + + public VectorPTFEvaluatorStreamingDecimalMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + max = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null decimal column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + HiveDecimalWritable repeatedMax = decimalColVector.vector[0]; + if (isNull) { + max.set(repeatedMax); + isNull = false; + } else if (repeatedMax.compareTo(max) == 1) { + max.set(repeatedMax); + } + outputColVector.set(0, max); + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputColVector.set(0, max); + } + outputColVector.isRepeating = true; + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + max.set(value); + isNull = false; + } else if (value.compareTo(max) == 1) { + max.set(value); + } + outputColVector.set(i, max); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputColVector.set(i, max); + } + if (++i >= size) { + return; + } + } + + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i]; + if (isNull) { + max.set(firstValue); + isNull = false; + } else if (firstValue.compareTo(max) == 1) { + max.set(firstValue); + } + + outputColVector.set(i++, max); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + max.set(value); + isNull = false; + } else if (value.compareTo(max) == 1) { + max.set(value); + } + outputColVector.set(i, max); + } else { + + // Continue previous MAX. + outputColVector.set(i, max); + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + max.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java new file mode 100644 index 0000000..e9b2bf2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable min; + + public VectorPTFEvaluatorStreamingDecimalMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + min = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null decimal column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + HiveDecimalWritable repeatedMin = decimalColVector.vector[0]; + if (isNull) { + min.set(repeatedMin); + isNull = false; + } else if (repeatedMin.compareTo(min) == -1) { + min.set(repeatedMin); + } + outputColVector.set(0, min); + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputColVector.set(0, min); + } + outputColVector.isRepeating = true; + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + min.set(value); + isNull = false; + } else if (value.compareTo(min) == -1) { + min.set(value); + } + outputColVector.set(i, min); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputColVector.set(i, min); + } + if (++i >= size) { + return; + } + } + + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i]; + if (isNull) { + min.set(firstValue); + isNull = false; + } else if (firstValue.compareTo(min) == -1) { + min.set(firstValue); + } + + outputColVector.set(i++, min); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + min.set(value); + isNull = false; + } else if (value.compareTo(min) == -1) { + min.set(value); + } + outputColVector.set(i, min); + } else { + + // Continue previous MIN. + outputColVector.set(i, min); + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + min.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java new file mode 100644 index 0000000..e727087 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable sum; + + public VectorPTFEvaluatorStreamingDecimalSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null decimal column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + HiveDecimalWritable repeatedValue = decimalColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum.mutateAdd(repeatedValue); + + // Output row i SUM. + outputColVector.set(i, sum); + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputColVector.set(0, sum); + } + outputColVector.isRepeating = true; + } + } else if (decimalColVector.noNulls) { + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + sum.mutateAdd(vector[i]); + + // Output row i sum. + outputColVector.set(i, sum); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputColVector.set(i, sum); + } + if (++i >= size) { + return; + } + } + + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + + sum.mutateAdd(vector[i++]); + + // Output row i sum. + outputColVector.set(i, sum); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + outputColVector.set(i, sum); + } else { + + // Continue previous SUM. + outputColVector.set(i, sum); + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO);; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java new file mode 100644 index 0000000..e0bb76d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingDoubleAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingDoubleAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null double column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = doubleColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[0] = avg; + } + outputColVector.isRepeating = true; + } + } else if (doubleColVector.noNulls) { + isNull = false; + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + if (++i >= size) { + return; + } + } + + isNull = false; + double[] vector = doubleColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i average. + outputVector[i] = avg; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java new file mode 100644 index 0000000..6117122 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double max; + + public VectorPTFEvaluatorStreamingDoubleMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null double column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final double repeatedMax = doubleColVector.vector[0]; + + if (isNull) { + max = repeatedMax; + isNull = false; + } else if (repeatedMax > max) { + max = repeatedMax; + } + outputVector[0] = max; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[0] = max; + } + outputColVector.isRepeating = true; + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + final double value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + if (++i >= size) { + return; + } + } + + double[] vector = doubleColVector.vector; + + final double firstValue = vector[i]; + if (isNull) { + max = firstValue; + isNull = false; + } else if (firstValue > max) { + max = firstValue; + } + + // Output row i max. + outputVector[i++] = max; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java new file mode 100644 index 0000000..30d5a80 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double min; + + public VectorPTFEvaluatorStreamingDoubleMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null double column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final double repeatedMin = doubleColVector.vector[0]; + + if (isNull) { + min = repeatedMin; + isNull = false; + } else if (repeatedMin < min) { + min = repeatedMin; + } + outputVector[0] = min; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[0] = min; + } + outputColVector.isRepeating = true; + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + final double value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + outputVector[i] = min; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + if (++i >= size) { + return; + } + } + + double[] vector = doubleColVector.vector; + + final double firstValue = vector[i]; + if (isNull) { + min = firstValue; + isNull = false; + } else if (firstValue < min) { + min = firstValue; + } + + // Output row i min. + outputVector[i++] = min; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + + // Output row i min. + outputVector[i] = min; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java new file mode 100644 index 0000000..9c4f415 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double sum; + + public VectorPTFEvaluatorStreamingDoubleSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = doubleColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + + // Output row i SUM. + outputVector[i] = sum; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[0] = sum; + } + outputColVector.isRepeating = true; + } + } else if (doubleColVector.noNulls) { + isNull = false; + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + + // Output row i SUM. + outputVector[i] = sum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + if (++i >= size) { + return; + } + } + + isNull = false; + double[] vector = doubleColVector.vector; + + sum += vector[i]; + + // Output row i sum. + outputVector[i++] = sum; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java new file mode 100644 index 0000000..b777ccd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingLongAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[0] = avg; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + nonNullGroupCount = 0; + avg = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java new file mode 100644 index 0000000..db7b791 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long max; + + public VectorPTFEvaluatorStreamingLongMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMax = longColVector.vector[0]; + + if (isNull) { + max = repeatedMax; + isNull = false; + } else if (repeatedMax > max) { + max = repeatedMax; + } + outputVector[0] = max; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[0] = max; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + max = firstValue; + isNull = false; + } else if (firstValue > max) { + max = firstValue; + } + + // Output row i max. + outputVector[i++] = max; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java new file mode 100644 index 0000000..6fade89 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long min; + + public VectorPTFEvaluatorStreamingLongMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMin = longColVector.vector[0]; + + if (isNull) { + min = repeatedMin; + isNull = false; + } else if (repeatedMin < min) { + min = repeatedMin; + } + outputVector[0] = min; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[0] = min; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + outputVector[i] = min; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + min = firstValue; + isNull = false; + } else if (firstValue < min) { + min = firstValue; + } + + // Output row i min. + outputVector[i++] = min; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + + // Output row i min. + outputVector[i] = min; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java new file mode 100644 index 0000000..c96bf12 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + + public VectorPTFEvaluatorStreamingLongSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final long repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[0] = sum; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + + // Output row i sum. + outputVector[i++] = sum; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + } + } + } + + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index a6a3417..ae9faa6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -228,6 +228,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -2823,6 +2824,18 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex setOperatorIssue(functionName + " only UNBOUNDED start frame is supported"); return false; } + List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; + final boolean isSingleParameter = + (exprNodeDescList != null && + exprNodeDescList.size() == 1); + final ExprNodeDesc singleExprNodeDesc = + (isSingleParameter ? exprNodeDescList.get(0) : null); + final TypeInfo singleTypeInfo = + (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null); + final PrimitiveCategory singlePrimitiveCategory = + (singleTypeInfo instanceof PrimitiveTypeInfo ? + ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null); + switch (windowFrameDef.getWindowType()) { case RANGE: if (!windowFrameDef.getEnd().isCurrentRow()) { @@ -2831,15 +2844,25 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex } break; case ROWS: - if (!windowFrameDef.isEndUnbounded()) { - setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type"); - return false; + { + boolean isRowEndCurrent = + (windowFrameDef.getEnd().isCurrentRow() && + (supportedFunctionType == SupportedFunctionType.AVG || + supportedFunctionType == SupportedFunctionType.MAX || + supportedFunctionType == SupportedFunctionType.MIN || + supportedFunctionType == SupportedFunctionType.SUM) && + isSingleParameter && + singlePrimitiveCategory != null); + if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) { + setOperatorIssue( + functionName + " UNBOUNDED end frame is required for ROWS window type"); + return false; + } } break; default: throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); } - List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; if (exprNodeDescList != null && exprNodeDescList.size() > 1) { setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java index 830b8c8..53886fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -50,6 +50,19 @@ import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongSum; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -138,27 +151,46 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function WindowFrameDef windowFrameDef, Type columnVectorType, VectorExpression inputVectorExpression, int outputColumnNum) { + final boolean isRowEndCurrent = + (windowFrameDef.getWindowType() == WindowType.ROWS && + windowFrameDef.getEnd().isCurrentRow()); + VectorPTFEvaluatorBase evaluator; switch (functionType) { case ROW_NUMBER: - evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); break; case RANK: - evaluator = new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case DENSE_RANK: - evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case MIN: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -167,13 +199,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case MAX: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -182,13 +226,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case SUM: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -197,13 +253,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case AVG: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index 808d8c8..3fa2655 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -563,7 +563,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1616,7 +1616,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1792,7 +1792,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2029,7 +2029,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3812,7 +3812,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index e16f843..9f49f2e 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -659,7 +659,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -1381,7 +1381,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2106,7 +2106,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2781,16 +2781,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2829,13 +2841,32 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3439,16 +3470,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -3487,13 +3530,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4100,16 +4163,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double, bigint] Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -4148,13 +4223,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 3:double, col 3:double, col 3:double, col 3:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 7, 2, 1, 3] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [ConstantVectorExpression(val 0) -> 8:int] + streamingColumns: [4, 5, 6, 7] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 6a132b8..cf6af00 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -68,16 +68,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -112,13 +124,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -938,7 +971,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1139,7 +1172,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1443,7 +1476,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1786,16 +1819,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1830,13 +1875,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1962,16 +2028,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2006,13 +2084,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5422,7 +5521,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Select Operator @@ -8349,16 +8448,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8385,15 +8496,38 @@ STAGE PLANS: name: min window function: GenericUDAFMinEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin] + functionInputExpressions: [col 3:double, col 3:double] + functionNames: [sum, min] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 0:string, col 1:string] + outputColumns: [4, 5, 1, 0, 2, 3] + outputTypes: [double, double, string, string, int, double] + streamingColumns: [4, 5] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: double), min_window_1 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) outputColumnNames: sum_window_0, min_window_1, _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 5, 1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [4, 5, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) Reducer 3 @@ -8777,16 +8911,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8807,13 +8953,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 3:double] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 1, 0, 2, 3] + outputTypes: [double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 2) -> 5:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9663,16 +9830,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [string, string] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, string] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int) outputColumnNames: _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -9693,13 +9872,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:int] + functionNames: [sum] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:int] + outputColumns: [2, 1] + outputTypes: [bigint, int] + partitionExpressions: [ConstantVectorExpression(val Manufacturer#6) -> 3:string] + streamingColumns: [2] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'Manufacturer#6' (type: string), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 2] + selectExpressions: ConstantVectorExpression(val Manufacturer#6) -> 4:string Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 2bb7730..5ea866b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -307,16 +307,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col4:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -344,13 +356,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:double, col 1:double] + functionNames: [rank, sum] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:double] + outputColumns: [3, 4, 0, 2, 1] + outputTypes: [int, double, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + selectExpressions: DoubleColSubtractDoubleScalar(col 4:double, val 5.0) -> 5:double Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1538,16 +1571,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1568,13 +1613,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string, col 0:string] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), avg_window_0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 91b52e7..7b6fa66 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -113,16 +113,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aza + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col2, _col3, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -143,16 +155,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 2:bigint] + functionNames: [sum] + keyInputColumns: [0, 2, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:bigint] + outputColumns: [3, 0, 2, 1] + outputTypes: [bigint, int, bigint, string] + partitionExpressions: [col 0:int] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), _col7 (type: string), _col3 (type: bigint), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -245,16 +280,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:string, KEY.reducesinkkey2:float + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) outputColumnNames: _col4, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 0, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -275,16 +322,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 2:float] + functionNames: [sum] + keyInputColumns: [2, 0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:float] + outputColumns: [3, 2, 0, 1] + outputTypes: [double, float, double, string] + partitionExpressions: [col 0:double] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: double), _col7 (type: string), _col4 (type: float), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out index 93b8655..584453c 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -105,16 +105,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col2, _col3, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -135,16 +147,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 2:bigint] + functionNames: [sum] + keyInputColumns: [0, 2, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:bigint] + outputColumns: [3, 0, 2, 1] + outputTypes: [bigint, int, bigint, string] + partitionExpressions: [col 0:int] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -327,16 +362,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:string, KEY.reducesinkkey2:float + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) outputColumnNames: _col4, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 0, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -357,16 +404,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 2:float] + functionNames: [sum] + keyInputColumns: [2, 0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:float] + outputColumns: [3, 2, 0, 1] + outputTypes: [double, float, double, string] + partitionExpressions: [col 0:double] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), sum_window_0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index cc6631e..b6b6cc2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -212,16 +212,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -256,13 +268,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -846,16 +879,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -890,13 +935,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2141,16 +2207,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2185,13 +2263,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2356,16 +2455,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2400,13 +2511,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2637,16 +2769,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2681,13 +2825,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3152,7 +3317,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -3889,7 +4054,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4784,16 +4949,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -4828,13 +5005,33 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:string, col 1:string, col 2:int] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/perf/spark/query51.q.out ql/src/test/results/clientpositive/perf/spark/query51.q.out index 78d164b..574fe68 100644 --- ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -412,15 +412,21 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2)) Reducer 4 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: max UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 5, 6, 7] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -447,17 +453,38 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDecimalMax, VectorPTFEvaluatorStreamingDecimalMax] + functionInputExpressions: [col 7:decimal(27,2), col 4:decimal(27,2)] + functionNames: [max, max] + native: true + orderExpressions: [IfExprStringGroupColumnStringGroupColumn(col 10:boolean, col 6:string, col 6:string)(children: IsNotNull(col 6:string) -> 10:boolean) -> 12:string] + partitionExpressions: [IfExprLongColumnLongColumn(col 10:boolean, col 5:int, col 2:int)(children: IsNotNull(col 5:int) -> 10:boolean) -> 11:int] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColGreaterDecimalColumn(col 8:decimal(27,2), col 9:decimal(27,2)) predicate: (max_window_0 > max_window_1) (type: boolean) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: string), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 14, 7, 4, 8, 9] + selectExpressions: IfExprLongColumnLongColumn(col 10:boolean, col 5:int, col 2:int)(children: IsNotNull(col 5:int) -> 10:boolean) -> 13:int, IfExprStringGroupColumnStringGroupColumn(col 10:boolean, col 6:string, col 6:string)(children: IsNotNull(col 6:string) -> 10:boolean) -> 14:string Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) diff --git ql/src/test/results/clientpositive/perf/tez/query51.q.out ql/src/test/results/clientpositive/perf/tez/query51.q.out index 9d3afc1..d3b2fcb 100644 --- ql/src/test/results/clientpositive/perf/tez/query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/query51.q.out @@ -365,15 +365,21 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2)) Reducer 4 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: max UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3, 4, 5, 6, 7] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -400,17 +406,38 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDecimalMax, VectorPTFEvaluatorStreamingDecimalMax] + functionInputExpressions: [col 7:decimal(27,2), col 4:decimal(27,2)] + functionNames: [max, max] + native: true + orderExpressions: [IfExprStringGroupColumnStringGroupColumn(col 10:boolean, col 6:string, col 6:string)(children: IsNotNull(col 6:string) -> 10:boolean) -> 12:string] + partitionExpressions: [IfExprLongColumnLongColumn(col 10:boolean, col 5:int, col 2:int)(children: IsNotNull(col 5:int) -> 10:boolean) -> 11:int] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDecimalColGreaterDecimalColumn(col 8:decimal(27,2), col 9:decimal(27,2)) predicate: (max_window_0 > max_window_1) (type: boolean) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: string), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13, 14, 7, 4, 8, 9] + selectExpressions: IfExprLongColumnLongColumn(col 10:boolean, col 5:int, col 2:int)(children: IsNotNull(col 5:int) -> 10:boolean) -> 13:int, IfExprStringGroupColumnStringGroupColumn(col 10:boolean, col 6:string, col 6:string)(children: IsNotNull(col 6:string) -> 10:boolean) -> 14:string Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 2017923..62d0942 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -69,6 +69,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -547,6 +548,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1572,6 +1574,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1744,6 +1747,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1976,6 +1980,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2916,6 +2921,7 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3672,6 +3678,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index f82e248..a133aad 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -209,15 +209,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -252,13 +265,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -835,15 +869,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -878,13 +925,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2112,15 +2180,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2155,13 +2236,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2323,15 +2425,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2366,13 +2481,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2599,15 +2735,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2642,13 +2791,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3108,7 +3278,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -3792,15 +3962,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -3835,13 +4018,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4659,15 +4863,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 5 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -4702,13 +4919,33 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:string, col 1:string, col 2:int] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat