diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f60091bf9f..35aae6a6a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -110,6 +111,7 @@ import java.util.List; import java.util.Map; + public class RexNodeConverter { private static class InputCtx { @@ -354,6 +356,12 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); calciteOp = SqlStdOperatorTable.OR; } + } else if (calciteOp.getKind() == SqlKind.COALESCE && + childRexNodeLst.size() > 1 ) { + // Rewrite COALESCE as a CASE + // This allows to be further reduced to OR, if possible + calciteOp = SqlStdOperatorTable.CASE; + childRexNodeLst = rewriteCoalesceChildren(func, childRexNodeLst); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } @@ -537,7 +545,6 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } - private List rewriteToDateChildren(List childRexNodeLst) { List newChildRexNodeLst = new ArrayList(); assert childRexNodeLst.size() == 1; @@ -566,6 +573,25 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + private List rewriteCoalesceChildren( + ExprNodeGenericFuncDesc func, List childRexNodeLst) { + final List convertedChildList = Lists.newArrayList(); + assert childRexNodeLst.size() > 0; + final RexBuilder rexBuilder = cluster.getRexBuilder(); + int i=0; + for (; i < childRexNodeLst.size()-1; ++i ) { + // WHEN child not null THEN child + final RexNode child = childRexNodeLst.get(i); + RexNode childCond = rexBuilder.makeCall( + SqlStdOperatorTable.IS_NOT_NULL, child); + convertedChildList.add(childCond); + convertedChildList.add(child); + } + // Add the last child as the ELSE element + convertedChildList.add(childRexNodeLst.get(i)); + return convertedChildList; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 06c9617818..83e1b42651 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -240,6 +240,7 @@ public static ASTNode buildAST(SqlOperator op, List children) { case IS_NOT_NULL: case IS_NULL: case CASE: + case COALESCE: case EXTRACT: case FLOOR: case CEIL: @@ -374,6 +375,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("is not distinct from", SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, hToken(HiveParser.EQUAL_NS, "<=>")); registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); + registerFunction("coalesce", SqlStdOperatorTable.COALESCE, hToken(HiveParser.Identifier, "coalesce")); // timebased registerFunction("year", HiveExtractDate.YEAR, hToken(HiveParser.Identifier, "year")); diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 695a269c70..d2c9ff9dd0 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -210,7 +210,7 @@ STAGE PLANS: outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), COALESCE(_col3,0) (type: decimal(13,3)), COALESCE(_col4,0) (type: decimal(13,3)) + expressions: _col0 (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: decimal(13,3)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(13,3)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/groupby13.q.out b/ql/src/test/results/clientpositive/groupby13.q.out index 0747cfabbf..e0359e9f7b 100644 --- a/ql/src/test/results/clientpositive/groupby13.q.out +++ b/ql/src/test/results/clientpositive/groupby13.q.out @@ -44,7 +44,7 @@ STAGE PLANS: alias: grpby_test Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: int_col_7 (type: int), int_col_7 (type: int), least(COALESCE(int_col_5,-279),COALESCE(int_col_7,476)) (type: int) + expressions: int_col_7 (type: int), int_col_7 (type: int), least(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (-279) END,CASE WHEN (int_col_7 is not null) THEN (int_col_7) ELSE (476) END) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/innerjoin1.q.out b/ql/src/test/results/clientpositive/innerjoin1.q.out index d17ed26c8d..aa530a99a9 100644 --- a/ql/src/test/results/clientpositive/innerjoin1.q.out +++ b/ql/src/test/results/clientpositive/innerjoin1.q.out @@ -76,50 +76,50 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c1 - filterExpr: COALESCE(a1,a2) is not null (type: boolean) + filterExpr: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(a1,a2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a1 (type: string), a2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: c2 - filterExpr: COALESCE(b1,b2) is not null (type: boolean) + filterExpr: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(b1,b2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b1 (type: string), b2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 COALESCE(_col0,_col1) (type: string) - 1 COALESCE(_col0,_col1) (type: string) + 0 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + 1 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,50 +157,50 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c1 - filterExpr: COALESCE(a1,a2) is not null (type: boolean) + filterExpr: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(a1,a2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a1 (type: string), a2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: c2 - filterExpr: COALESCE(b1,b2) is not null (type: boolean) + filterExpr: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(b1,b2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b1 (type: string), b2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 COALESCE(_col0,_col1) (type: string) - 1 COALESCE(_col0,_col1) (type: string) + 0 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + 1 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out index d80bbf17ee..15c36ece63 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out @@ -85,7 +85,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col4,'EMPTY')) (type: boolean) + predicate: (CASE WHEN (_col1 is not null) THEN (_col1) ELSE ('EMPTY') END = CASE WHEN (_col4 is not null) THEN (_col4) ELSE ('EMPTY') END) (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -161,7 +161,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col4,'EMPTY')) (type: boolean) + predicate: (CASE WHEN (_col1 is not null) THEN (_col1) ELSE ('EMPTY') END = CASE WHEN (_col4 is not null) THEN (_col4) ELSE ('EMPTY') END) (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -170,7 +170,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index a65603b2cf..986700ca86 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -26,32 +26,14 @@ STAGE PLANS: Vertices: Map 1 Map Operator Tree: - TableScan Vectorization: - native: true - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNull(col 5:double) - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 2, 4, 1, 17] - selectExpressions: VectorCoalesce(columns [13, 6, 14, 15, 16])(children: ConstantVectorExpression(val null) -> 13:string, col 6:string, CastLongToString(col 2:int) -> 14:string, CastFloatToString(col 4:float) -> 15:string, CastLongToString(col 1:smallint) -> 16:string) -> 17:string - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: SELECT operator: Unable to vectorize CASE WHEN expression -- data type float of THEN/ELSE expression is different than the result type string. Conversion is not supported + vectorized: false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -143,8 +125,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 2, 16] - selectExpressions: VectorCoalesce(columns [13, 15, 14])(children: ConstantVectorExpression(val null) -> 13:double, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 16:double + projectedOutputColumnNums: [5, 2, 14] + selectExpressions: IfExprDoubleColumnLongScalar(col 19:boolean, col 20:double, val 0)(children: ColAndCol(col 15:boolean, col 18:boolean)(children: IsNotNull(col 5:double) -> 15:boolean, IsNotNull(col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 18:boolean) -> 19:boolean, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 20:double) -> 14:double Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -335,8 +317,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 9, 13] - selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8:timestamp, col 9:timestamp) -> 13:timestamp + projectedOutputColumnNums: [8, 9, 14] + selectExpressions: IfExprTimestampColumnColumn(col 13:boolean, col 8:timestampcol 9:timestamp)(children: IsNotNull(col 8:timestamp) -> 13:boolean) -> 14:timestamp Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -518,8 +500,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 0, 15] - selectExpressions: ConstantVectorExpression(val null) -> 13:bigint, VectorCoalesce(columns [14, 0])(children: ConstantVectorExpression(val null) -> 14:bigint, col 0:tinyint) -> 15:bigint + projectedOutputColumnNums: [13, 0, 0] + selectExpressions: ConstantVectorExpression(val null) -> 13:bigint Limit Vectorization: className: VectorLimitOperator native: true diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index 3776f3e6b3..1808d133d7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -62,18 +62,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -103,7 +103,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -200,13 +200,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -227,7 +227,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 @@ -287,18 +287,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -328,7 +328,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -425,13 +425,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -452,7 +452,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 1953826911..8763ba1b10 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -113,13 +113,13 @@ STAGE PLANS: 1 Map 2 Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + expressions: _col0 (type: bigint), CASE WHEN (CASE WHEN (_col2 is not null) THEN ((_col2 > 1L)) ELSE (true) END) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: IfExprColumnNull(col 3:boolean, col 2:bigint, null)(children: LongColGreaterLongScalar(col 4:bigint, val 1)(children: VectorCoalesce(columns [2, 3])(children: col 2:bigint, ConstantVectorExpression(val 5) -> 3:bigint) -> 4:bigint) -> 3:boolean, col 2:bigint) -> 4:bigint + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnNull(col 6:boolean, col 2:bigint, null)(children: IfExprCondExprColumn(col 3:boolean, col 4:boolean, col 5:boolean)(children: IsNotNull(col 2:bigint) -> 3:boolean, LongColGreaterLongScalar(col 2:bigint, val 1) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:boolean) -> 6:boolean, col 2:bigint) -> 7:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -147,7 +147,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: member:bigint partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, bigint] + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint] Map 2 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out index 01b746d381..00babf1262 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -57,13 +57,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator - expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + expressions: CASE WHEN (a is not null) THEN (a) ELSE (b) END (type: int), a (type: int), b (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 0, 1] - selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + projectedOutputColumnNums: [4, 0, 1] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 0:int, col 1:int)(children: IsNotNull(col 0:int) -> 3:boolean) -> 4:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -73,7 +73,7 @@ STAGE PLANS: keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:int + valueColumns: 4:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -92,7 +92,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:int, b:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out b/ql/src/test/results/clientpositive/perf/spark/query40.q.out index 6cdac29e9b..cd49695e4e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out @@ -229,7 +229,7 @@ STAGE PLANS: 1 Map 9 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)) + expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out index 87b84bb452..ec2b091842 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -444,7 +444,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -599,7 +599,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -723,7 +723,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out index 597a468601..3e31b314ee 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -224,7 +224,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col11, _col13, _col14, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), COALESCE((_col4 * CAST( _col3 AS decimal(10,0))),0) (type: decimal(18,2)) + expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), CASE WHEN ((_col4 is not null and _col3 is not null)) THEN ((_col4 * CAST( _col3 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index 54c3c695aa..70de097f0b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -528,7 +528,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -597,7 +597,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -650,7 +650,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -757,7 +757,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -778,7 +778,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -831,7 +831,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query77.q.out b/ql/src/test/results/clientpositive/perf/spark/query77.q.out index ea80550259..86bf24903b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query77.q.out @@ -800,7 +800,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4, _col5 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2)) + expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(17,2)), (_col2 - CASE WHEN (_col5 is not null) THEN (_col5) ELSE (0) END) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -888,7 +888,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'store channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2)) + expressions: 'store channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(17,2)), (_col2 - CASE WHEN (_col5 is not null) THEN (_col5) ELSE (0) END) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out index 15c7f04b30..dae889e2c4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -129,7 +129,7 @@ STAGE PLANS: Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 177), Map 21 (PARTITION-LEVEL SORT, 177) Reducer 3 <- Reducer 2 (GROUP, 241) Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 167), Reducer 3 (PARTITION-LEVEL SORT, 167) - Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 91), Reducer 4 (PARTITION-LEVEL SORT, 91) + Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 113), Reducer 4 (PARTITION-LEVEL SORT, 113) Reducer 6 <- Reducer 5 (SORT, 1) Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 432), Map 9 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### @@ -457,13 +457,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9 Statistics: Num rows: 191662559 Data size: 16908526668 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col7,0) > 0) (type: boolean) - Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END (type: boolean) + Statistics: Num rows: 95831279 Data size: 8454263289 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95831279 Data size: 8454263289 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: @@ -474,18 +474,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col11, _col12, _col13 - Statistics: Num rows: 70276272 Data size: 6199793181 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 105414409 Data size: 9299689819 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col11,0) > 0) (type: boolean) - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END (type: boolean) + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (COALESCE(_col7,0) + COALESCE(_col11,0)) (type: bigint), (COALESCE(_col8,0) + COALESCE(_col12,0)) (type: decimal(18,2)), (COALESCE(_col9,0) + COALESCE(_col13,0)) (type: decimal(18,2)), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), round((UDFToDouble(_col2) / UDFToDouble(COALESCE((_col7 + _col11),1))), 2) (type: double) + expressions: _col0 (type: int), _col1 (type: int), (CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END + CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END) (type: bigint), (CASE WHEN (_col8 is not null) THEN (_col8) ELSE (0) END + CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END) (type: decimal(18,2)), (CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END + CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: decimal(18,2)), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), round((UDFToDouble(_col2) / UDFToDouble(CASE WHEN ((_col7 is not null and _col11 is not null)) THEN ((_col7 + _col11)) ELSE (1) END)), 2) (type: double) outputColumnNames: _col0, _col1, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col9 (type: bigint), _col10 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(18,2)), _col8 (type: decimal(18,2)), _col12 (type: double) sort order: ++---++++ - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized @@ -493,7 +493,7 @@ STAGE PLANS: Select Operator expressions: 2000 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: bigint), KEY.reducesinkkey6 (type: decimal(18,2)), KEY.reducesinkkey7 (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query80.q.out b/ql/src/test/results/clientpositive/perf/spark/query80.q.out index 63cca92556..75cd0059f3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query80.q.out @@ -651,7 +651,7 @@ STAGE PLANS: outputColumnNames: _col5, _col6, _col9, _col10, _col18 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -781,7 +781,7 @@ STAGE PLANS: 1 Map 30 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -855,7 +855,7 @@ STAGE PLANS: 1 Map 11 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out index b1102603c7..f475d9be23 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -142,15 +142,15 @@ Stage-0 File Output Operator [FS_238] Limit [LIM_237] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_236] (rows=23425424 width=88) + Select Operator [SEL_236] (rows=52707204 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_73] - Select Operator [SEL_72] (rows=23425424 width=88) + Select Operator [SEL_72] (rows=52707204 width=88) Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_71] (rows=23425424 width=88) - predicate:(COALESCE(_col11,0) > 0) - Merge Join Operator [MERGEJOIN_191] (rows=70276272 width=88) + Filter Operator [FIL_71] (rows=52707204 width=88) + predicate:CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END + Merge Join Operator [MERGEJOIN_191] (rows=105414409 width=88) Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_235] @@ -214,8 +214,8 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col1 - Filter Operator [FIL_45] (rows=63887519 width=88) - predicate:(COALESCE(_col7,0) > 0) + Filter Operator [FIL_45] (rows=95831279 width=88) + predicate:CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END Merge Join Operator [MERGEJOIN_190] (rows=191662559 width=88) Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/semijoin4.q.out b/ql/src/test/results/clientpositive/semijoin4.q.out index 83db8b85bc..70e9a933dd 100644 --- a/ql/src/test/results/clientpositive/semijoin4.q.out +++ b/ql/src/test/results/clientpositive/semijoin4.q.out @@ -57,11 +57,10 @@ WHERE (t2.tinyint_col_21) IN ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -75,15 +74,14 @@ STAGE PLANS: predicate: ((tinyint_col_46 = -92Y) and bigint_col_13 is not null and decimal1309_col_65 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) - outputColumnNames: _col0, _col1, _col3, _col4 + expressions: bigint_col_13 (type: bigint), decimal1309_col_65 (type: decimal(13,9)) + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: decimal(27,9)), _col0 (type: bigint) + key expressions: _col2 (type: decimal(27,9)), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col4 (type: decimal(27,9)), _col0 (type: bigint) + Map-reduce partition columns: _col2 (type: decimal(27,9)), _col0 (type: bigint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) TableScan alias: t2 filterExpr: ((tinyint_col_21 = -92Y) and tinyint_col_18 is not null and decimal2709_col_9 is not null) (type: boolean) @@ -105,9 +103,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: decimal(27,9)), _col0 (type: bigint) + 0 _col2 (type: decimal(27,9)), _col0 (type: bigint) 1 _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -123,7 +120,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) TableScan Reduce Output Operator sort order: @@ -135,65 +131,20 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint), floor(_col3) (type: bigint) - sort order: +- - Map-reduce partition columns: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint) - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col3: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: (UDFToShort(UDFToByte(-92)) + _col1) ASC NULLS LAST, floor(_col3) DESC NULLS LAST - partition by: (UDFToShort(UDFToByte(-92)) + _col1) - raw input shape: - window functions: - window function definition - alias: LEAD_window_0 - arguments: -973 - name: LEAD - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true + Select Operator + expressions: 498 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: COALESCE(498,LEAD_window_0,524) (type: int) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -248,7 +199,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out index fc65f85b47..dc24376c63 100644 --- a/ql/src/test/results/clientpositive/semijoin5.q.out +++ b/ql/src/test/results/clientpositive/semijoin5.q.out @@ -48,11 +48,10 @@ WHERE (t2.smallint_col_19) IN (SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-5 Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -146,7 +145,7 @@ STAGE PLANS: TableScan Reduce Output Operator key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) - sort order: +- + sort order: -- Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) @@ -165,66 +164,19 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (_col7 + UDFToInteger(_col5)) ASC NULLS LAST, floor(_col3) DESC NULLS LAST + order by: (_col7 + UDFToInteger(_col5)) DESC NULLS LAST, floor(_col3) DESC NULLS LAST partition by: (_col7 + UDFToInteger(_col5)) raw input shape: window functions: window function definition - alias: LEAD_window_0 - arguments: -973 - name: LEAD - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) - outputColumnNames: LEAD_window_0, _col3, _col5, _col7 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) - sort order: -- - Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col4 (type: double), VALUE._col6 (type: smallint), VALUE._col8 (type: int) - outputColumnNames: _col0, _col4, _col6, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col4: double, _col6: smallint, _col8: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: (_col8 + UDFToInteger(_col6)) DESC NULLS LAST, floor(_col4) DESC NULLS LAST - partition by: (_col8 + UDFToInteger(_col6)) - raw input shape: - window functions: - window function definition - alias: sum_window_1 + alias: sum_window_0 arguments: 62 name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) + expressions: 498 (type: int), (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint), CASE WHEN (sum_window_0 is not null) THEN (sum_window_0) ELSE (704) END (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -235,7 +187,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/vector_coalesce.q.out b/ql/src/test/results/clientpositive/vector_coalesce.q.out index 891839bc3a..2a8c0773bb 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -22,32 +22,12 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan Vectorization: - native: true - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNull(col 5:double) - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [6, 2, 4, 1, 17] - selectExpressions: VectorCoalesce(columns [13, 6, 14, 15, 16])(children: ConstantVectorExpression(val null) -> 13:string, col 6:string, CastLongToString(col 2:int) -> 14:string, CastFloatToString(col 4:float) -> 15:string, CastLongToString(col 1:smallint) -> 16:string) -> 17:string - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: SELECT operator: Unable to vectorize CASE WHEN expression -- data type float of THEN/ELSE expression is different than the result type string. Conversion is not supported + vectorized: false Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -116,8 +96,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 2, 16] - selectExpressions: VectorCoalesce(columns [13, 15, 14])(children: ConstantVectorExpression(val null) -> 13:double, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 16:double + projectedOutputColumnNums: [5, 2, 14] + selectExpressions: IfExprDoubleColumnLongScalar(col 19:boolean, col 20:double, val 0)(children: ColAndCol(col 15:boolean, col 18:boolean)(children: IsNotNull(col 5:double) -> 15:boolean, IsNotNull(col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 18:boolean) -> 19:boolean, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 20:double) -> 14:double Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -282,8 +262,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 9, 13] - selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8:timestamp, col 9:timestamp) -> 13:timestamp + projectedOutputColumnNums: [8, 9, 14] + selectExpressions: IfExprTimestampColumnColumn(col 13:boolean, col 8:timestampcol 9:timestamp)(children: IsNotNull(col 8:timestamp) -> 13:boolean) -> 14:timestamp Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -446,8 +426,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 0, 15] - selectExpressions: ConstantVectorExpression(val null) -> 13:bigint, VectorCoalesce(columns [14, 0])(children: ConstantVectorExpression(val null) -> 14:bigint, col 0:tinyint) -> 15:bigint + projectedOutputColumnNums: [13, 0, 0] + selectExpressions: ConstantVectorExpression(val null) -> 13:bigint Limit Vectorization: className: VectorLimitOperator native: true diff --git a/ql/src/test/results/clientpositive/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 918ac59c99..4f63e883a9 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -56,18 +56,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -97,7 +97,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -171,13 +171,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -197,7 +197,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 @@ -251,18 +251,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -292,7 +292,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -366,13 +366,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -392,7 +392,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out index 884078d734..8a58c62816 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out @@ -123,13 +123,13 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + expressions: _col0 (type: bigint), CASE WHEN (CASE WHEN (_col2 is not null) THEN ((_col2 > 1L)) ELSE (true) END) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3] - selectExpressions: IfExprColumnNull(col 2:boolean, col 1:bigint, null)(children: LongColGreaterLongScalar(col 3:bigint, val 1)(children: VectorCoalesce(columns [1, 2])(children: col 1:bigint, ConstantVectorExpression(val 5) -> 2:bigint) -> 3:bigint) -> 2:boolean, col 1:bigint) -> 3:bigint + projectedOutputColumnNums: [0, 6] + selectExpressions: IfExprColumnNull(col 5:boolean, col 1:bigint, null)(children: IfExprCondExprColumn(col 2:boolean, col 3:boolean, col 4:boolean)(children: IsNotNull(col 1:bigint) -> 2:boolean, LongColGreaterLongScalar(col 1:bigint, val 1) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:boolean) -> 5:boolean, col 1:bigint) -> 6:bigint Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out index 8cb5ffa545..649fb8a5f2 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out @@ -51,13 +51,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator - expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + expressions: CASE WHEN (a is not null) THEN (a) ELSE (b) END (type: int), a (type: int), b (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 0, 1] - selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + projectedOutputColumnNums: [4, 0, 1] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 0:int, col 1:int)(children: IsNotNull(col 0:int) -> 3:boolean) -> 4:int Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -84,7 +84,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:int, b:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true