diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java index 785725c..437c319 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -73,14 +73,19 @@ public void evaluateInputExpr(VectorizedRowBatch batch) throws HiveException { } // Evaluate the aggregation over one of the group's batches. - public abstract void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) throws HiveException; + public abstract void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException; - // Returns true if the aggregation result will be streamed. - public boolean streamsResult() { - // Assume it is not streamjng by default. - return false; + // Do any work necessary after the last batch for a group has been processed. Necessary + // for both streaming and non-streaming evaluators.. + public void doLastBatchWork() { + // By default, do nothing. } + // Returns true if the aggregation result will be streamed. + // Otherwise, we must evaluate whole group before producing a result. + public abstract boolean streamsResult(); + public int getOutputColumnNum() { return outputColumnNum; } @@ -88,7 +93,7 @@ public int getOutputColumnNum() { // After processing all the group's batches with evaluateGroupBatch, is the non-streaming // aggregation result null? public boolean isGroupResultNull() { - return false; + throw new RuntimeException("Not implemented"); } // What is the ColumnVector type of the aggregation result? diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java index 9409c80..77b9892 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java @@ -42,7 +42,8 @@ public VectorPTFEvaluatorCount(WindowFrameDef windowFrameDef, VectorExpression i resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -83,6 +84,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java index 9f9c04a..e44b614 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCountStar.java @@ -38,7 +38,8 @@ public VectorPTFEvaluatorCountStar(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) { // No input expression for COUNT(*). // evaluateInputExpr(batch); @@ -48,6 +49,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java index 4541843..85281c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java @@ -51,7 +51,8 @@ public VectorPTFEvaluatorDecimalAvg(WindowFrameDef windowFrameDef, VectorExpress resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -124,17 +125,24 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } } + } - if (isLastGroupBatch) { - if (!isGroupResultNull) { - avg.set(sum); - temp.setFromLong(nonNullGroupCount); - avg.mutateDivide(temp); - } + @Override + public void doLastBatchWork() { + if (!isGroupResultNull) { + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); } } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java index c36fb77..078e56a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -48,7 +48,8 @@ public VectorPTFEvaluatorDecimalFirstValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -98,6 +99,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } + @Override public boolean streamsResult() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java index 380ce60..6f97111 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java @@ -47,7 +47,8 @@ public VectorPTFEvaluatorDecimalLastValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -57,9 +58,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc // We do not filter when PTF is in reducer. Preconditions.checkState(!batch.selectedInUse); - if (!isLastGroupBatch) { - return; - } final int size = batch.size; if (size == 0) { return; @@ -88,6 +86,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java index 46ee261..f66deb6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -44,7 +44,8 @@ public VectorPTFEvaluatorDecimalMax(WindowFrameDef windowFrameDef, VectorExpress resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -97,15 +98,15 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc return; } } + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i++]; if (isGroupResultNull) { - max.set(vector[i++]); + max.set(firstValue); isGroupResultNull = false; - } else { - final HiveDecimalWritable dec = vector[i++]; - if (dec.compareTo(max) == 1) { - max.set(dec); - } + } else if (firstValue.compareTo(max) == 1) { + max.set(firstValue); } for (; i < size; i++) { if (!batchIsNull[i]) { @@ -119,6 +120,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } @@ -133,11 +140,9 @@ public HiveDecimalWritable getDecimalGroupResult() { return max; } - private static HiveDecimal MIN_VALUE = HiveDecimal.create("-99999999999999999999999999999999999999"); - @Override public void resetEvaluator() { isGroupResultNull = true; - max.set(MIN_VALUE); + max.setFromLong(0); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java index f881356..9f5a89a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -44,7 +44,7 @@ public VectorPTFEvaluatorDecimalMin(WindowFrameDef windowFrameDef, VectorExpress resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -98,14 +98,13 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i++]; if (isGroupResultNull) { - min.set(vector[i++]); + min.set(firstValue); isGroupResultNull = false; - } else { - final HiveDecimalWritable dec = vector[i++]; - if (dec.compareTo(min) == -1) { - min.set(dec); - } + } else if (firstValue.compareTo(min) == -1) { + min.set(firstValue); } for (; i < size; i++) { if (!batchIsNull[i]) { @@ -119,6 +118,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } @@ -133,11 +138,9 @@ public HiveDecimalWritable getDecimalGroupResult() { return min; } - private static HiveDecimal MAX_VALUE = HiveDecimal.create("99999999999999999999999999999999999999"); - @Override public void resetEvaluator() { isGroupResultNull = true; - min.set(MAX_VALUE); + min.setFromLong(0); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java index 4b31dc4..93d8ed5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java @@ -46,7 +46,8 @@ public VectorPTFEvaluatorDecimalSum(WindowFrameDef windowFrameDef, VectorExpress resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -115,6 +116,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index 5025171..cb6b586 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -50,12 +51,14 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc longColVector.isRepeating = true; longColVector.isNull[0] = false; longColVector.vector[0] = denseRank; + } - if (isLastGroupBatch) { - denseRank++; - } + @Override + public void doLastBatchWork() { + denseRank++; } + @Override public boolean streamsResult() { // No group value. return true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java index 224177a..e20a562 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java @@ -45,7 +45,8 @@ public VectorPTFEvaluatorDoubleAvg(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -117,15 +118,22 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc sum += varSum; } } + } - if (isLastGroupBatch) { - if (!isGroupResultNull) { - avg = sum / nonNullGroupCount; - } + @Override + public void doLastBatchWork() { + if (!isGroupResultNull) { + avg = sum / nonNullGroupCount; } } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java index d20d10c..26bd083 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -45,7 +45,8 @@ public VectorPTFEvaluatorDoubleFirstValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -95,6 +96,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } + @Override public boolean streamsResult() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java index 83a8e33..9986e9a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java @@ -43,7 +43,8 @@ public VectorPTFEvaluatorDoubleLastValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -53,9 +54,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc // We do not filter when PTF is in reducer. Preconditions.checkState(!batch.selectedInUse); - if (!isLastGroupBatch) { - return; - } final int size = batch.size; if (size == 0) { return; @@ -84,6 +82,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java index 50280d9..8c8e8ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorDoubleMax(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -64,7 +65,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc isGroupResultNull = false; } else { final double repeatedMax = doubleColVector.vector[0]; - if (repeatedMax < max) { + if (repeatedMax > max) { max = repeatedMax; } } @@ -112,6 +113,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } @@ -129,6 +136,6 @@ public double getDoubleGroupResult() { @Override public void resetEvaluator() { isGroupResultNull = true; - max = Double.MIN_VALUE; + max = 0.0; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java index 24788af..87d8757 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorDoubleMin(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -112,6 +113,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } @@ -129,6 +136,6 @@ public double getDoubleGroupResult() { @Override public void resetEvaluator() { isGroupResultNull = true; - min = Double.MAX_VALUE; + min = 0.0; } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java index 902d81e..85a77c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorDoubleSum(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -109,6 +110,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java index e2d1768..4b525bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java @@ -45,7 +45,8 @@ public VectorPTFEvaluatorLongAvg(WindowFrameDef windowFrameDef, VectorExpression resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -117,15 +118,22 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc sum += varSum; } } + } - if (isLastGroupBatch) { - if (!isGroupResultNull) { - avg = ((double) sum) / nonNullGroupCount; - } + @Override + public void doLastBatchWork() { + if (!isGroupResultNull) { + avg = ((double) sum) / nonNullGroupCount; } } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java index 37323fe..fa497ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -45,7 +45,8 @@ public VectorPTFEvaluatorLongFirstValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -95,6 +96,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } + @Override public boolean streamsResult() { return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java index 925841b..fe768cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java @@ -44,7 +44,8 @@ public VectorPTFEvaluatorLongLastValue(WindowFrameDef windowFrameDef, resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -54,9 +55,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc // We do not filter when PTF is in reducer. Preconditions.checkState(!batch.selectedInUse); - if (!isLastGroupBatch) { - return; - } final int size = batch.size; if (size == 0) { return; @@ -85,6 +83,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java index 638f1b7..87a6431 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorLongMax(WindowFrameDef windowFrameDef, VectorExpression resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -112,6 +113,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java index 6238a03..9192b5b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorLongMin(WindowFrameDef windowFrameDef, VectorExpression resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -112,6 +113,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java index afd3952..8c67d24 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java @@ -41,7 +41,8 @@ public VectorPTFEvaluatorLongSum(WindowFrameDef windowFrameDef, VectorExpression resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -109,6 +110,12 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } @Override + public boolean streamsResult() { + // We must evaluate whole group before producing a result. + return false; + } + + @Override public boolean isGroupResultNull() { return isGroupResultNull; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java index 9cbc816..d20c60c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -42,7 +42,8 @@ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression in resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -56,13 +57,15 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc longColVector.isNull[0] = false; longColVector.vector[0] = rank; groupCount += batch.size; + } - if (isLastGroupBatch) { - rank += groupCount; - groupCount = 0; - } + @Override + public void doLastBatchWork() { + rank += groupCount; + groupCount = 0; } + @Override public boolean streamsResult() { // No group value. return true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java index 94de1d7..384541c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRowNumber.java @@ -40,7 +40,8 @@ public VectorPTFEvaluatorRowNumber(WindowFrameDef windowFrameDef, VectorExpressi resetEvaluator(); } - public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { evaluateInputExpr(batch); @@ -53,11 +54,13 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } } + @Override public boolean streamsResult() { // No group value. return true; } + @Override public boolean isGroupResultNull() { return false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java new file mode 100644 index 0000000..e51d1fc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalAvg.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates streaming HiveDecimal avg() for a PTF group. + * + * Stream average non-null column values and output sum / non-null count as we go along. + */ +public class VectorPTFEvaluatorStreamingDecimalAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable sum; + private int nonNullGroupCount; + private HiveDecimalWritable temp; + private HiveDecimalWritable avg; + + public VectorPTFEvaluatorStreamingDecimalAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + temp = new HiveDecimalWritable(); + avg = new HiveDecimalWritable(); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null decimal column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + HiveDecimalWritable repeatedValue = decimalColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum.mutateAdd(repeatedValue); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + outputColVector.set(i, avg); + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputColVector.set(0, avg); + } + outputColVector.isRepeating = true; + } + } else if (decimalColVector.noNulls) { + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + outputColVector.set(i, avg); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputColVector.set(i, avg); + } + if (++i >= size) { + return; + } + } + + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + // Output row i AVG. + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + + outputColVector.set(i++, avg); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + nonNullGroupCount++; + + avg.set(sum); + temp.setFromLong(nonNullGroupCount); + avg.mutateDivide(temp); + + // Output row i AVG. + outputColVector.set(i, avg); + } else { + + // Continue previous AVG. + outputColVector.set(i, avg); + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO); + nonNullGroupCount = 0; + avg.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java new file mode 100644 index 0000000..9357242 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMax.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable max; + + public VectorPTFEvaluatorStreamingDecimalMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + max = new HiveDecimalWritable(); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null decimal column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + HiveDecimalWritable repeatedMax = decimalColVector.vector[0]; + if (isNull) { + max.set(repeatedMax); + isNull = false; + } else if (repeatedMax.compareTo(max) == 1) { + max.set(repeatedMax); + } + outputColVector.set(0, max); + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputColVector.set(0, max); + } + outputColVector.isRepeating = true; + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + max.set(value); + isNull = false; + } else if (value.compareTo(max) == 1) { + max.set(value); + } + outputColVector.set(i, max); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputColVector.set(i, max); + } + if (++i >= size) { + return; + } + } + + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i]; + if (isNull) { + max.set(firstValue); + isNull = false; + } else if (firstValue.compareTo(max) == 1) { + max.set(firstValue); + } + + outputColVector.set(i++, max); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + max.set(value); + isNull = false; + } else if (value.compareTo(max) == 1) { + max.set(value); + } + outputColVector.set(i, max); + } else { + + // Continue previous MAX. + outputColVector.set(i, max); + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + max.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java new file mode 100644 index 0000000..51b43d7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalMin.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable min; + + public VectorPTFEvaluatorStreamingDecimalMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + min = new HiveDecimalWritable(); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null decimal column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + HiveDecimalWritable repeatedMin = decimalColVector.vector[0]; + if (isNull) { + min.set(repeatedMin); + isNull = false; + } else if (repeatedMin.compareTo(min) == -1) { + min.set(repeatedMin); + } + outputColVector.set(0, min); + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputColVector.set(0, min); + } + outputColVector.isRepeating = true; + } else if (decimalColVector.noNulls) { + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + min.set(value); + isNull = false; + } else if (value.compareTo(min) == -1) { + min.set(value); + } + outputColVector.set(i, min); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputColVector.set(i, min); + } + if (++i >= size) { + return; + } + } + + HiveDecimalWritable[] vector = decimalColVector.vector; + + final HiveDecimalWritable firstValue = vector[i]; + if (isNull) { + min.set(firstValue); + isNull = false; + } else if (firstValue.compareTo(min) == -1) { + min.set(firstValue); + } + + outputColVector.set(i++, min); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final HiveDecimalWritable value = vector[i]; + if (isNull) { + min.set(value); + isNull = false; + } else if (value.compareTo(min) == -1) { + min.set(value); + } + outputColVector.set(i, min); + } else { + + // Continue previous MIN. + outputColVector.set(i, min); + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + min.set(HiveDecimal.ZERO); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java new file mode 100644 index 0000000..bc8620a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDecimalSum.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates HiveDecimal sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDecimalSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected HiveDecimalWritable sum; + + public VectorPTFEvaluatorStreamingDecimalSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + sum = new HiveDecimalWritable(); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null decimal column values; maintain isGroupResultNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); + + DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + + if (decimalColVector.isRepeating) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + HiveDecimalWritable repeatedValue = decimalColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum.mutateAdd(repeatedValue); + + // Output row i SUM. + outputColVector.set(i, sum); + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputColVector.set(0, sum); + } + outputColVector.isRepeating = true; + } + } else if (decimalColVector.noNulls) { + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + for (int i = 0; i < size; i++) { + sum.mutateAdd(vector[i]); + + // Output row i sum. + outputColVector.set(i, sum); + } + } else { + boolean[] batchIsNull = decimalColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputColVector.set(i, sum); + } + if (++i >= size) { + return; + } + } + + isNull = false; + HiveDecimalWritable[] vector = decimalColVector.vector; + + sum.mutateAdd(vector[i++]); + + // Output row i sum. + outputColVector.set(i, sum); + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum.mutateAdd(vector[i]); + outputColVector.set(i, sum); + } else { + + // Continue previous SUM. + outputColVector.set(i, sum); + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DECIMAL; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum.set(HiveDecimal.ZERO);; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java new file mode 100644 index 0000000..f6c5942 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleAvg.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingDoubleAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingDoubleAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null double column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = doubleColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[0] = avg; + } + outputColVector.isRepeating = true; + } + } else if (doubleColVector.noNulls) { + isNull = false; + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + if (++i >= size) { + return; + } + } + + isNull = false; + double[] vector = doubleColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i average. + outputVector[i] = avg; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + nonNullGroupCount = 0; + avg = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java new file mode 100644 index 0000000..1d61cc5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMax.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double max; + + public VectorPTFEvaluatorStreamingDoubleMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null double column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final double repeatedMax = doubleColVector.vector[0]; + + if (isNull) { + max = repeatedMax; + isNull = false; + } else if (repeatedMax > max) { + max = repeatedMax; + } + outputVector[0] = max; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[0] = max; + } + outputColVector.isRepeating = true; + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + final double value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + if (++i >= size) { + return; + } + } + + double[] vector = doubleColVector.vector; + + final double firstValue = vector[i]; + if (isNull) { + max = firstValue; + isNull = false; + } else if (firstValue > max) { + max = firstValue; + } + + // Output row i max. + outputVector[i++] = max; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java new file mode 100644 index 0000000..9ac197d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleMin.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double min; + + public VectorPTFEvaluatorStreamingDoubleMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null double column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final double repeatedMin = doubleColVector.vector[0]; + + if (isNull) { + min = repeatedMin; + isNull = false; + } else if (repeatedMin < min) { + min = repeatedMin; + } + outputVector[0] = min; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[0] = min; + } + outputColVector.isRepeating = true; + } else if (doubleColVector.noNulls) { + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + final double value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + outputVector[i] = min; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + if (++i >= size) { + return; + } + } + + double[] vector = doubleColVector.vector; + + final double firstValue = vector[i]; + if (isNull) { + min = firstValue; + isNull = false; + } else if (firstValue < min) { + min = firstValue; + } + + // Output row i min. + outputVector[i++] = min; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final double value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + + // Output row i min. + outputVector[i] = min; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java new file mode 100644 index 0000000..8f17663 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingDoubleSum.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates double sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingDoubleSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected double sum; + + public VectorPTFEvaluatorStreamingDoubleSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (doubleColVector.isRepeating) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = doubleColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + + // Output row i SUM. + outputVector[i] = sum; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[0] = sum; + } + outputColVector.isRepeating = true; + } + } else if (doubleColVector.noNulls) { + isNull = false; + double[] vector = doubleColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + + // Output row i SUM. + outputVector[i] = sum; + } + } else { + boolean[] batchIsNull = doubleColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + if (++i >= size) { + return; + } + } + + isNull = false; + double[] vector = doubleColVector.vector; + + sum += vector[i]; + + // Output row i sum. + outputVector[i++] = sum; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0.0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java new file mode 100644 index 0000000..78d543a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingLongAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values for avg; maintain isGroupResultNull; after last row of + // last group batch compute the group avg when sum is non-null. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; + double[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final double repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous AVG. + outputVector[0] = avg; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; + } else { + + // Continue previous AVG. + outputVector[i] = avg; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.DOUBLE; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + nonNullGroupCount = 0; + avg = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java new file mode 100644 index 0000000..94d19b3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMax.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long max() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMax extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long max; + + public VectorPTFEvaluatorStreamingLongMax(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine maximum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMax = longColVector.vector[0]; + + if (isNull) { + max = repeatedMax; + isNull = false; + } else if (repeatedMax > max) { + max = repeatedMax; + } + outputVector[0] = max; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[0] = max; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + max = firstValue; + isNull = false; + } else if (firstValue > max) { + max = firstValue; + } + + // Output row i max. + outputVector[i++] = max; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + max = value; + isNull = false; + } else if (value > max) { + max = value; + } + outputVector[i] = max; + } else { + + // Continue previous MAX. + outputVector[i] = max; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + max = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java new file mode 100644 index 0000000..2d7caf3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongMin.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long min() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongMin extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long min; + + public VectorPTFEvaluatorStreamingLongMin(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Determine minimum of all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value but we only need to evaluate once for MIN/MAX. + final long repeatedMin = longColVector.vector[0]; + + if (isNull) { + min = repeatedMin; + isNull = false; + } else if (repeatedMin < min) { + min = repeatedMin; + } + outputVector[0] = min; + } else if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[0] = min; + } + outputColVector.isRepeating = true; + } else if (longColVector.noNulls) { + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + outputVector[i] = min; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + if (++i >= size) { + return; + } + } + + long[] vector = longColVector.vector; + + final long firstValue = vector[i]; + if (isNull) { + min = firstValue; + isNull = false; + } else if (firstValue < min) { + min = firstValue; + } + + // Output row i min. + outputVector[i++] = min; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + final long value = vector[i]; + if (isNull) { + min = value; + isNull = false; + } else if (value < min) { + min = value; + } + + // Output row i min. + outputVector[i] = min; + } else { + + // Continue previous MIN. + outputVector[i] = min; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + min = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java new file mode 100644 index 0000000..76bca6b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongSum.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long sum() for a PTF group. + */ +public class VectorPTFEvaluatorStreamingLongSum extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + + public VectorPTFEvaluatorStreamingLongSum(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { + super(windowFrameDef, inputVecExpr, outputColumnNum); + resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + + evaluateInputExpr(batch); + + // Sum all non-null long column values; maintain isNull. + + // We do not filter when PTF is in reducer. + Preconditions.checkState(!batch.selectedInUse); + + final int size = batch.size; + if (size == 0) { + return; + } + LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + + if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + + // We have a repeated value. + isNull = false; + final long repeatedValue = longColVector.vector[0]; + + for (int i = 0; i < size; i++) { + sum += repeatedValue; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[0] = sum; + } + outputColVector.isRepeating = true; + } + } else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } + } else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { + if (isNull) { + outputColVector.isNull[i] = true; + outputColVector.noNulls = false; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + if (++i >= size) { + return; + } + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + + // Output row i sum. + outputVector[i++] = sum; + + for (; i < size; i++) { + if (!batchIsNull[i]) { + sum += vector[i]; + + // Output row i sum. + outputVector[i] = sum; + } else { + + // Continue previous SUM. + outputVector[i] = sum; + } + } + } + } + + @Override + public boolean streamsResult() { + // No group value. + return true; + } + + @Override + public Type getResultColumnVectorType() { + return Type.LONG; + } + + @Override + public void resetEvaluator() { + isNull = true; + sum = 0; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index ff89775..b0340d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -162,7 +162,10 @@ public void evaluateStreamingGroupBatch(VectorizedRowBatch batch, boolean isLast // Streaming evaluators fill in their results during the evaluate call. for (VectorPTFEvaluatorBase evaluator : evaluators) { - evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + evaluator.evaluateGroupBatch(batch); + if (isLastGroupBatch) { + evaluator.doLastBatchWork(); + } } } @@ -170,7 +173,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc throws HiveException { for (VectorPTFEvaluatorBase evaluator : evaluators) { - evaluator.evaluateGroupBatch(batch, isLastGroupBatch); + evaluator.evaluateGroupBatch(batch); + if (isLastGroupBatch) { + evaluator.doLastBatchWork(); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 9bb104d..1956125 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -228,6 +228,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -2823,6 +2824,18 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex setOperatorIssue(functionName + " only UNBOUNDED start frame is supported"); return false; } + List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; + final boolean isSingleParameter = + (exprNodeDescList != null && + exprNodeDescList.size() == 1); + final ExprNodeDesc singleExprNodeDesc = + (isSingleParameter ? exprNodeDescList.get(0) : null); + final TypeInfo singleTypeInfo = + (isSingleParameter ? singleExprNodeDesc.getTypeInfo() : null); + final PrimitiveCategory singlePrimitiveCategory = + (singleTypeInfo instanceof PrimitiveTypeInfo ? + ((PrimitiveTypeInfo) singleTypeInfo).getPrimitiveCategory() : null); + switch (windowFrameDef.getWindowType()) { case RANGE: if (!windowFrameDef.getEnd().isCurrentRow()) { @@ -2831,15 +2844,25 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex } break; case ROWS: - if (!windowFrameDef.isEndUnbounded()) { - setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type"); - return false; + { + boolean isRowEndCurrent = + (windowFrameDef.getEnd().isCurrentRow() && + (supportedFunctionType == SupportedFunctionType.AVG || + supportedFunctionType == SupportedFunctionType.MAX || + supportedFunctionType == SupportedFunctionType.MIN || + supportedFunctionType == SupportedFunctionType.SUM) && + isSingleParameter && + singlePrimitiveCategory != null); + if (!isRowEndCurrent && !windowFrameDef.isEndUnbounded()) { + setOperatorIssue( + functionName + " UNBOUNDED end frame is required for ROWS window type"); + return false; + } } break; default: throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); } - List exprNodeDescList = evaluatorInputExprNodeDescLists[i]; if (exprNodeDescList != null && exprNodeDescList.size() > 1) { setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); return false; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java index 830b8c8..53886fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -50,6 +50,19 @@ import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorLongSum; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRank; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorRowNumber; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDecimalSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingDoubleSum; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongAvg; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMax; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongMin; +import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFEvaluatorStreamingLongSum; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -138,27 +151,46 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function WindowFrameDef windowFrameDef, Type columnVectorType, VectorExpression inputVectorExpression, int outputColumnNum) { + final boolean isRowEndCurrent = + (windowFrameDef.getWindowType() == WindowType.ROWS && + windowFrameDef.getEnd().isCurrentRow()); + VectorPTFEvaluatorBase evaluator; switch (functionType) { case ROW_NUMBER: - evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); break; case RANK: - evaluator = new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case DENSE_RANK: - evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = + new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); break; case MIN: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMin( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -167,13 +199,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case MAX: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalMax( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -182,13 +226,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case SUM: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalSum( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); @@ -197,13 +253,25 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function case AVG: switch (columnVectorType) { case LONG: - evaluator = new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingLongAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DOUBLE: - evaluator = new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDoubleAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; case DECIMAL: - evaluator = new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = !isRowEndCurrent ? + new VectorPTFEvaluatorDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum) : + new VectorPTFEvaluatorStreamingDecimalAvg( + windowFrameDef, inputVectorExpression, outputColumnNum); break; default: throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType); diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index 808d8c8..3fa2655 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -563,7 +563,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1616,7 +1616,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1792,7 +1792,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2029,7 +2029,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3812,7 +3812,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index e16f843..9f49f2e 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -659,7 +659,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -1381,7 +1381,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2106,7 +2106,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: first_value UNBOUNDED end frame is required for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -2781,16 +2781,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2829,13 +2841,32 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [1, 2] + orderExpressions: [col 0:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3439,16 +3470,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -3487,13 +3530,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double, col 2:double, col 2:double, col 2:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 6, 0, 1, 2] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5, 6] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4100,16 +4163,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double, double, double, bigint] Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -4148,13 +4223,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin, VectorPTFEvaluatorStreamingDoubleMax, VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 3:double, col 3:double, col 3:double, col 3:double] + functionNames: [sum, min, max, avg] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 7, 2, 1, 3] + outputTypes: [double, double, double, double, string, string, double] + partitionExpressions: [ConstantVectorExpression(val 0) -> 8:int] + streamingColumns: [4, 5, 6, 7] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), sum_window_0 (type: double), min_window_1 (type: double), max_window_2 (type: double), avg_window_3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 3, 4, 5, 6, 7] Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 10344 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 6a132b8..cf6af00 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -68,16 +68,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -112,13 +124,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -938,7 +971,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1139,7 +1172,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1443,7 +1476,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -1786,16 +1819,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1830,13 +1875,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1962,16 +2028,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2006,13 +2084,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5422,7 +5521,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Select Operator @@ -8349,16 +8448,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8385,15 +8496,38 @@ STAGE PLANS: name: min window function: GenericUDAFMinEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum, VectorPTFEvaluatorStreamingDoubleMin] + functionInputExpressions: [col 3:double, col 3:double] + functionNames: [sum, min] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 0:string, col 1:string] + outputColumns: [4, 5, 1, 0, 2, 3] + outputTypes: [double, double, string, string, int, double] + streamingColumns: [4, 5] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: double), min_window_1 (type: double), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) outputColumnNames: sum_window_0, min_window_1, _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 5, 1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumnNums: [0, 1] + valueColumnNums: [4, 5, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: double), min_window_1 (type: double), _col5 (type: int), _col7 (type: double) Reducer 3 @@ -8777,16 +8911,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -8807,13 +8953,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 3:double] + functionNames: [sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 1, 0, 2, 3] + outputTypes: [double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), round(sum_window_0, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 2) -> 5:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -9663,16 +9830,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [string, string] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string, string] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int) outputColumnNames: _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -9693,13 +9872,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:int] + functionNames: [sum] + keyInputColumns: [1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:int] + outputColumns: [2, 1] + outputTypes: [bigint, int] + partitionExpressions: [ConstantVectorExpression(val Manufacturer#6) -> 3:string] + streamingColumns: [2] Statistics: Num rows: 5 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'Manufacturer#6' (type: string), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 2] + selectExpressions: ConstantVectorExpression(val Manufacturer#6) -> 4:string Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 2bb7730..5ea866b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -307,16 +307,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:double, VALUE._col4:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double) outputColumnNames: _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -344,13 +356,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:double, col 1:double] + functionNames: [rank, sum] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:double] + outputColumns: [3, 4, 0, 2, 1] + outputTypes: [int, double, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4] Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + selectExpressions: DoubleColSubtractDoubleScalar(col 4:double, val 5.0) -> 5:double Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1538,16 +1571,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double) outputColumnNames: _col2, _col4, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1568,13 +1613,33 @@ STAGE PLANS: name: avg window function: GenericUDAFAverageEvaluatorDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleAvg] + functionInputExpressions: [col 2:double] + functionNames: [avg] + keyInputColumns: [0, 1] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string, col 0:string] + outputColumns: [3, 0, 1, 2] + outputTypes: [double, string, string, double] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), avg_window_0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 91b52e7..7b6fa66 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -113,16 +113,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aza + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col2, _col3, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -143,16 +155,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 2:bigint] + functionNames: [sum] + keyInputColumns: [0, 2, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:bigint] + outputColumns: [3, 0, 2, 1] + outputTypes: [bigint, int, bigint, string] + partitionExpressions: [col 0:int] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), _col7 (type: string), _col3 (type: bigint), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -245,16 +280,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:string, KEY.reducesinkkey2:float + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) outputColumnNames: _col4, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 0, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -275,16 +322,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 2:float] + functionNames: [sum] + keyInputColumns: [2, 0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:float] + outputColumns: [3, 2, 0, 1] + outputTypes: [double, float, double, string] + partitionExpressions: [col 0:double] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: double), _col7 (type: string), _col4 (type: float), sum_window_0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out index 93b8655..584453c 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -105,16 +105,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col2, _col3, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -135,16 +147,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 2:bigint] + functionNames: [sum] + keyInputColumns: [0, 2, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:bigint] + outputColumns: [3, 0, 2, 1] + outputTypes: [bigint, int, bigint, string] + partitionExpressions: [col 0:int] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -327,16 +362,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:string, KEY.reducesinkkey2:float + partitionColumnCount: 0 + scratchColumnTypeNames: [double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: float), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: string) outputColumnNames: _col4, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 0, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -357,16 +404,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 2:float] + functionNames: [sum] + keyInputColumns: [2, 0, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:float] + outputColumns: [3, 2, 0, 1] + outputTypes: [double, float, double, string] + partitionExpressions: [col 0:double] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), sum_window_0 (type: double) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index cc6631e..b6b6cc2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -212,16 +212,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -256,13 +268,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -846,16 +879,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -890,13 +935,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2141,16 +2207,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2185,13 +2263,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2356,16 +2455,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2400,13 +2511,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2637,16 +2769,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2681,13 +2825,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3152,7 +3317,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -3889,7 +4054,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4784,16 +4949,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -4828,13 +5005,33 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:string, col 1:string, col 2:int] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/perf/spark/query51.q.out ql/src/test/results/clientpositive/perf/spark/query51.q.out index c0bb72b..21afbe2 100644 --- ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -284,6 +284,7 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2)) Reducer 4 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: decimal(27,2)) diff --git ql/src/test/results/clientpositive/perf/tez/query51.q.out ql/src/test/results/clientpositive/perf/tez/query51.q.out index be123ae..ba3f994 100644 --- ql/src/test/results/clientpositive/perf/tez/query51.q.out +++ ql/src/test/results/clientpositive/perf/tez/query51.q.out @@ -106,20 +106,20 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_113] - Limit [LIM_112] (rows=100 width=88) + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_111] (rows=116159124 width=88) + Select Operator [SEL_115] (rows=116159124 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_50] - Select Operator [SEL_46] (rows=116159124 width=88) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=116159124 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_58] (rows=116159124 width=88) + Filter Operator [FIL_112] (rows=116159124 width=88) predicate:(max_window_0 > max_window_1) - PTF Operator [PTF_45] (rows=348477374 width=88) + PTF Operator [PTF_111] (rows=348477374 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END ASC NULLS FIRST","partition by:":"CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END"}] - Select Operator [SEL_44] (rows=348477374 width=88) + Select Operator [SEL_110] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 2017923..62d0942 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -69,6 +69,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -547,6 +548,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1572,6 +1574,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1744,6 +1747,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -1976,6 +1980,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -2916,6 +2921,7 @@ STAGE PLANS: value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3672,6 +3678,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index f82e248..a133aad 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -209,15 +209,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -252,13 +265,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -835,15 +869,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -878,13 +925,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2112,15 +2180,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2155,13 +2236,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2323,15 +2425,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2366,13 +2481,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2599,15 +2735,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) Reducer 4 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2642,13 +2791,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3108,7 +3278,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type + notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -3792,15 +3962,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int, VALUE._col5:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, double, double] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -3835,13 +4018,34 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingDoubleSum] + functionInputExpressions: [col 1:string, col 1:string, col 3:double] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 1:string] + outputColumns: [4, 5, 6, 1, 0, 2, 3] + outputTypes: [int, int, double, string, string, int, double] + partitionExpressions: [col 0:string] + streamingColumns: [4, 5, 6] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 7] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 2) -> 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4659,15 +4863,28 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int) Reducer 5 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -4702,13 +4919,33 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 1:string, col 1:string, col 2:int] + functionNames: [rank, dense_rank, sum] + keyInputColumns: [1, 0] + native: true + nonKeyInputColumns: [2] + orderExpressions: [col 1:string] + outputColumns: [3, 4, 5, 1, 0, 2] + outputTypes: [int, int, bigint, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3, 4, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 2, 5] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat