diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java index 437c319..daefdc4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -65,6 +65,13 @@ public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, VectorExpression in this.outputColumnNum = outputColumnNum; } + public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, int outputColumnNum) { + this.windowFrameDef = windowFrameDef; + inputVecExpr = null; + inputColumnNum = -1; + this.outputColumnNum = outputColumnNum; + } + // Evaluate the aggregation input argument expression. public void evaluateInputExpr(VectorizedRowBatch batch) throws HiveException { if (inputVecExpr != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index cb6b586..c80b077 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; @@ -35,9 +34,8 @@ private int denseRank; - public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, - int outputColumnNum) { - super(windowFrameDef, inputVecExpr, outputColumnNum); + public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, int outputColumnNum) { + super(windowFrameDef, outputColumnNum); resetEvaluator(); } @@ -45,7 +43,7 @@ public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpressi public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { - evaluateInputExpr(batch); + // We don't evaluate input columns... LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java index d20c60c..5fd2506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; @@ -36,9 +35,8 @@ private int rank; private int groupCount; - public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, - int outputColumnNum) { - super(windowFrameDef, inputVecExpr, outputColumnNum); + public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, int outputColumnNum) { + super(windowFrameDef, outputColumnNum); resetEvaluator(); } @@ -46,7 +44,7 @@ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression in public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { - evaluateInputExpr(batch); + // We don't evaluate input columns... /* * Do careful maintenance of the outputColVector.noNulls flag. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 1956125..48974f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2863,45 +2863,50 @@ private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContex default: throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType()); } - if (exprNodeDescList != null && exprNodeDescList.size() > 1) { - setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); - return false; - } - if (exprNodeDescList != null) { - ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0); - if (containsLeadLag(exprNodeDesc)) { - setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName); - return false; - } + // RANK/DENSE_RANK don't care about columns. + if (supportedFunctionType != SupportedFunctionType.RANK && + supportedFunctionType != SupportedFunctionType.DENSE_RANK) { - if (supportedFunctionType != SupportedFunctionType.COUNT && - supportedFunctionType != SupportedFunctionType.DENSE_RANK && - supportedFunctionType != SupportedFunctionType.RANK) { + if (exprNodeDescList != null) { + if (exprNodeDescList.size() > 1) { + setOperatorIssue("More than 1 argument expression of aggregation function " + functionName); + return false; + } - // COUNT, DENSE_RANK, and RANK do not care about column types. The rest do. - TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); - Category category = typeInfo.getCategory(); - boolean isSupportedType; - if (category != Category.PRIMITIVE) { - isSupportedType = false; - } else { - ColumnVector.Type colVecType = - VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - switch (colVecType) { - case LONG: - case DOUBLE: - case DECIMAL: - isSupportedType = true; - break; - default: + ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0); + + if (containsLeadLag(exprNodeDesc)) { + setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName); + return false; + } + + if (supportedFunctionType != SupportedFunctionType.COUNT) { + + // COUNT does not care about column types. The rest do. + TypeInfo typeInfo = exprNodeDesc.getTypeInfo(); + Category category = typeInfo.getCategory(); + boolean isSupportedType; + if (category != Category.PRIMITIVE) { isSupportedType = false; - break; + } else { + ColumnVector.Type colVecType = + VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + switch (colVecType) { + case LONG: + case DOUBLE: + case DECIMAL: + isSupportedType = true; + break; + default: + isSupportedType = false; + break; + } + } + if (!isSupportedType) { + setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName); + return false; } - } - if (!isSupportedType) { - setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName); - return false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java index 53886fe..54efca8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java @@ -162,12 +162,10 @@ public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType function new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum); break; case RANK: - evaluator = - new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = new VectorPTFEvaluatorRank(windowFrameDef, outputColumnNum); break; case DENSE_RANK: - evaluator = - new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum); + evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, outputColumnNum); break; case MIN: switch (columnVectorType) { diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index 3fa2655..3f4bab4 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -1440,7 +1440,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 9f49f2e..44bfe20 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -5765,16 +5765,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [bigint, timestamp, timestamp] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -5796,13 +5808,32 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0:string] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 6:timestamp) -> 7:timestamp] + outputColumns: [4, 0, 2, 3] + outputTypes: [int, string, string, double] + streamingColumns: [4] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4] Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -6868,16 +6899,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [bigint, timestamp, timestamp] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank - vectorized: false + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, VALUE._col0:string, VALUE._col1:double + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -6899,13 +6942,32 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 0:string] + functionNames: [rank] + keyInputColumns: [0] + native: true + nonKeyInputColumns: [2, 3] + orderExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 6:timestamp) -> 7:timestamp] + outputColumns: [4, 0, 2, 3] + outputTypes: [int, string, string, double] + streamingColumns: [4] Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 3, 4] Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index cf6af00..53327bd 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -1660,16 +1660,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank - vectorized: false + reduceColumnNullOrder: aaz + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -1691,13 +1703,33 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:string] + functionNames: [rank] + keyInputColumns: [1, 0, 2] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:int] + outputColumns: [3, 1, 0, 2] + outputTypes: [int, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out index 0be304c..d2670af 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -364,16 +364,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function dense_rank - vectorized: false + reduceColumnNullOrder: aaz + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col2, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2, 0] Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -395,16 +407,39 @@ STAGE PLANS: window function: GenericUDAFDenseRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorDenseRank] + functionInputExpressions: [col 1:int] + functionNames: [dense_rank] + keyInputColumns: [1, 2, 0] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:int, col 2:string] + outputColumns: [3, 1, 2, 0] + outputTypes: [int, int, string, timestamp] + partitionExpressions: [col 0:timestamp] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: string), dense_rank_window_0 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index b6b6cc2..82af6b0 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -2011,16 +2011,28 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank - vectorized: false + reduceColumnNullOrder: aaz + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: @@ -2042,13 +2054,33 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:string] + functionNames: [rank] + keyInputColumns: [1, 0, 2] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:int] + outputColumns: [3, 1, 0, 2] + outputTypes: [int, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4187,7 +4219,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -4634,7 +4666,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5638,7 +5670,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5946,7 +5978,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/perf/spark/query47.q.out ql/src/test/results/clientpositive/perf/spark/query47.q.out index 690b105..a9b5092 100644 --- ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -484,6 +484,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) Reducer 16 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) @@ -612,6 +613,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 24 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) @@ -701,6 +703,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) diff --git ql/src/test/results/clientpositive/perf/spark/query57.q.out ql/src/test/results/clientpositive/perf/spark/query57.q.out index 51e644a..6785ee9 100644 --- ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -478,6 +478,7 @@ STAGE PLANS: Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) Reducer 16 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2)) @@ -618,6 +619,7 @@ STAGE PLANS: Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 24 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) @@ -695,6 +697,7 @@ STAGE PLANS: Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) Reducer 5 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2)) diff --git ql/src/test/results/clientpositive/perf/tez/query47.q.out ql/src/test/results/clientpositive/perf/tez/query47.q.out index d034ea9..bd17808 100644 --- ql/src/test/results/clientpositive/perf/tez/query47.q.out +++ ql/src/test/results/clientpositive/perf/tez/query47.q.out @@ -121,40 +121,40 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_320] - Limit [LIM_319] (rows=100 width=88) + File Output Operator [FS_334] + Limit [LIM_333] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_318] (rows=843315280 width=88) + Select Operator [SEL_332] (rows=843315280 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_108] Select Operator [SEL_107] (rows=843315280 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_279] (rows=843315280 width=88) - Conds:RS_103._col0, _col1, _col2, _col3, (_col5 + 1)=RS_104._col0, _col1, _col2, _col3, _col8(Inner),RS_104._col0, _col1, _col2, _col3, _col8=RS_105._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_104] + Conds:RS_320._col0, _col1, _col2, _col3, (_col5 + 1)=RS_331._col0, _col1, _col2, _col3, _col8(Inner),RS_331._col0, _col1, _col2, _col3, _col8=RS_315._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] PartitionCols:_col0, _col1, _col2, _col3, _col8 - Select Operator [SEL_67] (rows=31943759 width=88) + Select Operator [SEL_330] (rows=31943759 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_169] (rows=31943759 width=88) + Filter Operator [FIL_329] (rows=31943759 width=88) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END - Select Operator [SEL_66] (rows=63887519 width=88) + Select Operator [SEL_328] (rows=63887519 width=88) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_170] (rows=63887519 width=88) + Filter Operator [FIL_327] (rows=63887519 width=88) predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) - PTF Operator [PTF_65] (rows=383325119 width=88) + PTF Operator [PTF_326] (rows=383325119 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col4, _col3, _col5, _col6"}] - Select Operator [SEL_64] (rows=383325119 width=88) + Select Operator [SEL_325] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + SHUFFLE [RS_324] PartitionCols:_col3, _col2, _col4, _col5 - Select Operator [SEL_315] (rows=383325119 width=88) + Select Operator [SEL_323] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_314] (rows=383325119 width=88) + PTF Operator [PTF_322] (rows=383325119 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5, _col0"}] - Select Operator [SEL_313] (rows=383325119 width=88) + Select Operator [SEL_321] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_310] @@ -247,31 +247,31 @@ Stage-0 Select Operator [SEL_299] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_297] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] PartitionCols:_col0, _col1, _col2, _col3, (_col5 - 1) - Select Operator [SEL_99] (rows=383325119 width=88) + Select Operator [SEL_314] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_175] (rows=383325119 width=88) + Filter Operator [FIL_313] (rows=383325119 width=88) predicate:rank_window_0 is not null - PTF Operator [PTF_98] (rows=383325119 width=88) + PTF Operator [PTF_312] (rows=383325119 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}] - Select Operator [SEL_97] (rows=383325119 width=88) + Select Operator [SEL_311] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_308] PartitionCols:_col3, _col2, _col4, _col5 Please refer to the previous Group By Operator [GBY_307] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_103] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0, _col1, _col2, _col3, (_col5 + 1) - Select Operator [SEL_29] (rows=383325119 width=88) + Select Operator [SEL_319] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_164] (rows=383325119 width=88) + Filter Operator [FIL_318] (rows=383325119 width=88) predicate:rank_window_0 is not null - PTF Operator [PTF_28] (rows=383325119 width=88) + PTF Operator [PTF_317] (rows=383325119 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}] - Select Operator [SEL_27] (rows=383325119 width=88) + Select Operator [SEL_316] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_309] diff --git ql/src/test/results/clientpositive/perf/tez/query57.q.out ql/src/test/results/clientpositive/perf/tez/query57.q.out index 42cbbdc..1d3c17d 100644 --- ql/src/test/results/clientpositive/perf/tez/query57.q.out +++ ql/src/test/results/clientpositive/perf/tez/query57.q.out @@ -115,40 +115,40 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_320] - Limit [LIM_319] (rows=100 width=135) + File Output Operator [FS_334] + Limit [LIM_333] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_318] (rows=421645952 width=135) + Select Operator [SEL_332] (rows=421645952 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_108] Select Operator [SEL_107] (rows=421645952 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] Merge Join Operator [MERGEJOIN_279] (rows=421645952 width=135) - Conds:RS_103._col0, _col1, _col2, (_col4 + 1)=RS_104._col0, _col1, _col2, _col7(Inner),RS_104._col0, _col1, _col2, _col7=RS_105._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_104] + Conds:RS_320._col0, _col1, _col2, (_col4 + 1)=RS_331._col0, _col1, _col2, _col7(Inner),RS_331._col0, _col1, _col2, _col7=RS_315._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] PartitionCols:_col0, _col1, _col2, _col7 - Select Operator [SEL_67] (rows=15971437 width=135) + Select Operator [SEL_330] (rows=15971437 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_169] (rows=15971437 width=135) + Filter Operator [FIL_329] (rows=15971437 width=135) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END - Select Operator [SEL_66] (rows=31942874 width=135) + Select Operator [SEL_328] (rows=31942874 width=135) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_170] (rows=31942874 width=135) + Filter Operator [FIL_327] (rows=31942874 width=135) predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) - PTF Operator [PTF_65] (rows=191657247 width=135) + PTF Operator [PTF_326] (rows=191657247 width=135) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col5, _col4, _col3"}] - Select Operator [SEL_64] (rows=191657247 width=135) + Select Operator [SEL_325] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + SHUFFLE [RS_324] PartitionCols:_col4, _col3, _col2 - Select Operator [SEL_315] (rows=191657247 width=135) + Select Operator [SEL_323] (rows=191657247 width=135) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_314] (rows=191657247 width=135) + PTF Operator [PTF_322] (rows=191657247 width=135) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2, _col0"}] - Select Operator [SEL_313] (rows=191657247 width=135) + Select Operator [SEL_321] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_310] @@ -241,31 +241,31 @@ Stage-0 Select Operator [SEL_299] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_297] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] PartitionCols:_col0, _col1, _col2, (_col4 - 1) - Select Operator [SEL_99] (rows=191657247 width=135) + Select Operator [SEL_314] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_175] (rows=191657247 width=135) + Filter Operator [FIL_313] (rows=191657247 width=135) predicate:rank_window_0 is not null - PTF Operator [PTF_98] (rows=191657247 width=135) + PTF Operator [PTF_312] (rows=191657247 width=135) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}] - Select Operator [SEL_97] (rows=191657247 width=135) + Select Operator [SEL_311] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_308] PartitionCols:_col4, _col3, _col2 Please refer to the previous Group By Operator [GBY_307] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_103] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0, _col1, _col2, (_col4 + 1) - Select Operator [SEL_29] (rows=191657247 width=135) + Select Operator [SEL_319] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_164] (rows=191657247 width=135) + Filter Operator [FIL_318] (rows=191657247 width=135) predicate:rank_window_0 is not null - PTF Operator [PTF_28] (rows=191657247 width=135) + PTF Operator [PTF_317] (rows=191657247 width=135) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}] - Select Operator [SEL_27] (rows=191657247 width=135) + Select Operator [SEL_316] (rows=191657247 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_309] diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 62d0942..65fa997 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -1403,6 +1403,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 3 + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index a133aad..813190e 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -1989,15 +1989,28 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank - vectorized: false + reduceColumnNullOrder: aaz + reduceColumnSortOrder: ++- + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -2019,13 +2032,33 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorRank] + functionInputExpressions: [col 1:string] + functionNames: [rank] + keyInputColumns: [1, 0, 2] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:int] + outputColumns: [3, 1, 0, 2] + outputTypes: [int, string, string, int] + partitionExpressions: [col 0:string] + streamingColumns: [3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4097,7 +4130,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum] vectorized: false Reduce Operator Tree: Select Operator @@ -4553,7 +4586,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5542,7 +5575,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator @@ -5845,7 +5878,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank + notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type vectorized: false Reduce Operator Tree: Select Operator