diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f60091bf9f..35aae6a6a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -110,6 +111,7 @@ import java.util.List; import java.util.Map; + public class RexNodeConverter { private static class InputCtx { @@ -354,6 +356,12 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); calciteOp = SqlStdOperatorTable.OR; } + } else if (calciteOp.getKind() == SqlKind.COALESCE && + childRexNodeLst.size() > 1 ) { + // Rewrite COALESCE as a CASE + // This allows to be further reduced to OR, if possible + calciteOp = SqlStdOperatorTable.CASE; + childRexNodeLst = rewriteCoalesceChildren(func, childRexNodeLst); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } @@ -537,7 +545,6 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } - private List rewriteToDateChildren(List childRexNodeLst) { List newChildRexNodeLst = new ArrayList(); assert childRexNodeLst.size() == 1; @@ -566,6 +573,25 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + private List rewriteCoalesceChildren( + ExprNodeGenericFuncDesc func, List childRexNodeLst) { + final List convertedChildList = Lists.newArrayList(); + assert childRexNodeLst.size() > 0; + final RexBuilder rexBuilder = cluster.getRexBuilder(); + int i=0; + for (; i < childRexNodeLst.size()-1; ++i ) { + // WHEN child not null THEN child + final RexNode child = childRexNodeLst.get(i); + RexNode childCond = rexBuilder.makeCall( + SqlStdOperatorTable.IS_NOT_NULL, child); + convertedChildList.add(childCond); + convertedChildList.add(child); + } + // Add the last child as the ELSE element + convertedChildList.add(childRexNodeLst.get(i)); + return convertedChildList; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 06c9617818..83e1b42651 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -240,6 +240,7 @@ public static ASTNode buildAST(SqlOperator op, List children) { case IS_NOT_NULL: case IS_NULL: case CASE: + case COALESCE: case EXTRACT: case FLOOR: case CEIL: @@ -374,6 +375,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("is not distinct from", SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, hToken(HiveParser.EQUAL_NS, "<=>")); registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); + registerFunction("coalesce", SqlStdOperatorTable.COALESCE, hToken(HiveParser.Identifier, "coalesce")); // timebased registerFunction("year", HiveExtractDate.YEAR, hToken(HiveParser.Identifier, "year")); diff --git ql/src/test/results/clientpositive/deleteAnalyze.q.out ql/src/test/results/clientpositive/deleteAnalyze.q.out index 86b4f3971b..e54270473a 100644 --- ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -214,7 +214,7 @@ STAGE PLANS: outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), COALESCE(_col3,0) (type: decimal(13,3)), COALESCE(_col4,0) (type: decimal(13,3)) + expressions: _col0 (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: decimal(13,3)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(13,3)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/groupby13.q.out ql/src/test/results/clientpositive/groupby13.q.out index cb06e0c401..9cbfa38eee 100644 --- ql/src/test/results/clientpositive/groupby13.q.out +++ ql/src/test/results/clientpositive/groupby13.q.out @@ -48,7 +48,7 @@ STAGE PLANS: alias: grpby_test Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: int_col_7 (type: int), int_col_7 (type: int), least(COALESCE(int_col_5,-279),COALESCE(int_col_7,476)) (type: int) + expressions: int_col_7 (type: int), int_col_7 (type: int), least(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (-279) END,CASE WHEN (int_col_7 is not null) THEN (int_col_7) ELSE (476) END) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/innerjoin1.q.out ql/src/test/results/clientpositive/innerjoin1.q.out index e0f3f8f2ee..e751a5c6d1 100644 --- ql/src/test/results/clientpositive/innerjoin1.q.out +++ ql/src/test/results/clientpositive/innerjoin1.q.out @@ -82,50 +82,50 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c1 - filterExpr: COALESCE(a1,a2) is not null (type: boolean) + filterExpr: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(a1,a2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a1 (type: string), a2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: c2 - filterExpr: COALESCE(b1,b2) is not null (type: boolean) + filterExpr: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(b1,b2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b1 (type: string), b2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 COALESCE(_col0,_col1) (type: string) - 1 COALESCE(_col0,_col1) (type: string) + 0 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + 1 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,50 +169,50 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c1 - filterExpr: COALESCE(a1,a2) is not null (type: boolean) + filterExpr: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(a1,a2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (a1 is not null) THEN (a1 is not null) ELSE (a2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a1 (type: string), a2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) TableScan alias: c2 - filterExpr: COALESCE(b1,b2) is not null (type: boolean) + filterExpr: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: COALESCE(b1,b2) is not null (type: boolean) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (b1 is not null) THEN (b1 is not null) ELSE (b2 is not null) END (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: b1 (type: string), b2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: COALESCE(_col0,_col1) (type: string) + key expressions: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) sort order: + - Map-reduce partition columns: COALESCE(_col0,_col1) (type: string) - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 COALESCE(_col0,_col1) (type: string) - 1 COALESCE(_col0,_col1) (type: string) + 0 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) + 1 CASE WHEN (_col0 is not null) THEN (_col0) ELSE (_col1) END (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out index 9492ca90c1..12ca6a47df 100644 --- ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out +++ ql/src/test/results/clientpositive/join_cond_pushdown_unqual5.q.out @@ -91,7 +91,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col4,'EMPTY')) (type: boolean) + predicate: (CASE WHEN (_col1 is not null) THEN (_col1) ELSE ('EMPTY') END = CASE WHEN (_col4 is not null) THEN (_col4) ELSE ('EMPTY') END) (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -173,7 +173,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col1,'EMPTY') = COALESCE(_col4,'EMPTY')) (type: boolean) + predicate: (CASE WHEN (_col1 is not null) THEN (_col1) ELSE ('EMPTY') END = CASE WHEN (_col4 is not null) THEN (_col4) ELSE ('EMPTY') END) (type: boolean) Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/vector_coalesce.q.out ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index 2cdaaf8621..fa1ddcd73e 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -39,8 +39,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [6, 2, 4, 1, 17] - selectExpressions: VectorCoalesce(columns [13, 6, 14, 15, 16])(children: ConstantVectorExpression(val null) -> 13:string, col 6:string, CastLongToString(col 2:int) -> 14:string, CastFloatToString(col 4:float) -> 15:string, CastLongToString(col 1:smallint) -> 16:string) -> 17:string + projectedOutputColumnNums: [6, 2, 4, 1, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 6:stringcol 20:string)(children: IsNotNull(col 6:string) -> 13:boolean, col 6:string, IfExprCondExprCondExpr(col 14:boolean, col 15:stringcol 19:string)(children: IsNotNull(col 2:int) -> 14:boolean, CastLongToString(col 2:int) -> 15:string, IfExprCondExprCondExpr(col 16:boolean, col 17:stringcol 18:string)(children: IsNotNull(col 4:float) -> 16:boolean, CastFloatToString(col 4:float) -> 17:string, CastLongToString(col 1:smallint) -> 18:string) -> 19:string) -> 20:string) -> 21:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -151,8 +151,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 2, 16] - selectExpressions: VectorCoalesce(columns [13, 15, 14])(children: ConstantVectorExpression(val null) -> 13:double, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 16:double + projectedOutputColumnNums: [5, 2, 18] + selectExpressions: IfExprCondExprCondExpr(col 16:boolean, col 17:doublecol 14:double)(children: ColAndCol(col 13:boolean, col 15:boolean)(children: IsNotNull(col 5:double) -> 13:boolean, IsNotNull(col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:boolean) -> 16:boolean, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 17:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 18:double Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -351,8 +351,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 9, 13] - selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8:timestamp, col 9:timestamp) -> 13:timestamp + projectedOutputColumnNums: [8, 9, 14] + selectExpressions: IfExprTimestampColumnColumn(col 13:boolean, col 8:timestampcol 9:timestamp)(children: IsNotNull(col 8:timestamp) -> 13:boolean) -> 14:timestamp Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -542,8 +542,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 0, 15] - selectExpressions: ConstantVectorExpression(val null) -> 13:bigint, VectorCoalesce(columns [14, 0])(children: ConstantVectorExpression(val null) -> 14:bigint, col 0:tinyint) -> 15:bigint + projectedOutputColumnNums: [13, 0, 0] + selectExpressions: ConstantVectorExpression(val null) -> 13:bigint Limit Vectorization: className: VectorLimitOperator native: true diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index ec6a78644f..f91843065a 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -66,18 +66,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 6] + selectExpressions: CastStringToLong(col 5:string)(children: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string) -> 6:int Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 6:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -208,13 +208,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -299,18 +299,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 6] + selectExpressions: CastStringToLong(col 5:string)(children: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string) -> 6:int Statistics: Num rows: 5 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 6:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -441,13 +441,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 0d2590a3bb..bf52ccf721 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -119,13 +119,13 @@ STAGE PLANS: 1 Map 2 Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + expressions: _col0 (type: bigint), CASE WHEN (CASE WHEN (_col2 is not null) THEN ((_col2 > 1L)) ELSE (true) END) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: IfExprColumnNull(col 3:boolean, col 2:bigint, null)(children: LongColGreaterLongScalar(col 4:bigint, val 1)(children: VectorCoalesce(columns [2, 3])(children: col 2:bigint, ConstantVectorExpression(val 5) -> 3:bigint) -> 4:bigint) -> 3:boolean, col 2:bigint) -> 4:bigint + projectedOutputColumnNums: [0, 7] + selectExpressions: IfExprColumnNull(col 6:boolean, col 2:bigint, null)(children: IfExprCondExprColumn(col 3:boolean, col 4:boolean, col 5:boolean)(children: IsNotNull(col 2:bigint) -> 3:boolean, LongColGreaterLongScalar(col 2:bigint, val 1) -> 4:boolean, ConstantVectorExpression(val 1) -> 5:boolean) -> 6:boolean, col 2:bigint) -> 7:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -153,7 +153,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: member:bigint partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, bigint] + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint] Map 2 Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out index 185f438e14..6a7e76e569 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -61,13 +61,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator - expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + expressions: CASE WHEN (a is not null) THEN (a) ELSE (b) END (type: int), a (type: int), b (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 0, 1] - selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + projectedOutputColumnNums: [4, 0, 1] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 0:int, col 1:int)(children: IsNotNull(col 0:int) -> 3:boolean) -> 4:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -77,7 +77,7 @@ STAGE PLANS: keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:int + valueColumns: 4:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -96,7 +96,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:int, b:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/perf/spark/query40.q.out ql/src/test/results/clientpositive/perf/spark/query40.q.out index a2d65130f5..8508af59d4 100644 --- ql/src/test/results/clientpositive/perf/spark/query40.q.out +++ ql/src/test/results/clientpositive/perf/spark/query40.q.out @@ -241,7 +241,7 @@ STAGE PLANS: 1 Map 9 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - COALESCE(_col7,0))) ELSE (0) END (type: decimal(13,2)) + expressions: _col14 (type: string), _col11 (type: string), CASE WHEN ((CAST( _col9 AS DATE) < DATE'1998-04-08')) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)), CASE WHEN ((CAST( _col9 AS DATE) >= DATE'1998-04-08')) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query49.q.out ql/src/test/results/clientpositive/perf/spark/query49.q.out index 18fbb17478..07d14b508c 100644 --- ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -460,7 +460,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -615,7 +615,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -739,7 +739,7 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col11, _col12 Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), COALESCE(_col11,0) (type: int), COALESCE(_col3,0) (type: int), COALESCE(_col12,0) (type: decimal(12,2)), COALESCE(_col4,0) (type: decimal(12,2)) + expressions: _col1 (type: int), CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(12,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query67.q.out ql/src/test/results/clientpositive/perf/spark/query67.q.out index 2a7718b7cb..44da5c74d1 100644 --- ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -234,7 +234,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col11, _col13, _col14, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), COALESCE((_col4 * CAST( _col3 AS decimal(10,0))),0) (type: decimal(18,2)) + expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), CASE WHEN ((_col4 is not null and _col3 is not null)) THEN ((_col4 * CAST( _col3 AS decimal(10,0)))) ELSE (0) END (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query75.q.out ql/src/test/results/clientpositive/perf/spark/query75.q.out index 70b3280b43..85e6dcac13 100644 --- ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -546,7 +546,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -615,7 +615,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -668,7 +668,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -775,7 +775,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -796,7 +796,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -849,7 +849,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - COALESCE(_col15,0)) (type: int), (_col4 - COALESCE(_col16,0)) (type: decimal(8,2)) + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query77.q.out ql/src/test/results/clientpositive/perf/spark/query77.q.out index 4b457b33bb..1ae664d8c6 100644 --- ql/src/test/results/clientpositive/perf/spark/query77.q.out +++ ql/src/test/results/clientpositive/perf/spark/query77.q.out @@ -820,7 +820,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4, _col5 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2)) + expressions: 'web channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(17,2)), (_col2 - CASE WHEN (_col5 is not null) THEN (_col5) ELSE (0) END) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -908,7 +908,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'store channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), COALESCE(_col4,0) (type: decimal(17,2)), (_col2 - COALESCE(_col5,0)) (type: decimal(18,2)) + expressions: 'store channel' (type: string), _col0 (type: int), _col1 (type: decimal(17,2)), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (0) END (type: decimal(17,2)), (_col2 - CASE WHEN (_col5 is not null) THEN (_col5) ELSE (0) END) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query78.q.out ql/src/test/results/clientpositive/perf/spark/query78.q.out index 190664791f..03f7a6d92c 100644 --- ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -145,7 +145,7 @@ STAGE PLANS: Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 177), Map 21 (PARTITION-LEVEL SORT, 177) Reducer 3 <- Reducer 2 (GROUP, 241) Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 167), Reducer 3 (PARTITION-LEVEL SORT, 167) - Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 91), Reducer 4 (PARTITION-LEVEL SORT, 91) + Reducer 5 <- Reducer 18 (PARTITION-LEVEL SORT, 113), Reducer 4 (PARTITION-LEVEL SORT, 113) Reducer 6 <- Reducer 5 (SORT, 1) Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 432), Map 9 (PARTITION-LEVEL SORT, 432) #### A masked pattern was here #### @@ -473,13 +473,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9 Statistics: Num rows: 191662559 Data size: 16908526668 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col7,0) > 0) (type: boolean) - Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END (type: boolean) + Statistics: Num rows: 95831279 Data size: 8454263289 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95831279 Data size: 8454263289 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: @@ -490,18 +490,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col11, _col12, _col13 - Statistics: Num rows: 70276272 Data size: 6199793181 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 105414409 Data size: 9299689819 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (COALESCE(_col11,0) > 0) (type: boolean) - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END (type: boolean) + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (COALESCE(_col7,0) + COALESCE(_col11,0)) (type: bigint), (COALESCE(_col8,0) + COALESCE(_col12,0)) (type: decimal(18,2)), (COALESCE(_col9,0) + COALESCE(_col13,0)) (type: decimal(18,2)), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), round((UDFToDouble(_col2) / UDFToDouble(COALESCE((_col7 + _col11),1))), 2) (type: double) + expressions: _col0 (type: int), _col1 (type: int), (CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END + CASE WHEN (_col11 is not null) THEN (_col11) ELSE (0) END) (type: bigint), (CASE WHEN (_col8 is not null) THEN (_col8) ELSE (0) END + CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END) (type: decimal(18,2)), (CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END + CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: decimal(18,2)), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), round((UDFToDouble(_col2) / UDFToDouble(CASE WHEN ((_col7 is not null and _col11 is not null)) THEN ((_col7 + _col11)) ELSE (1) END)), 2) (type: double) outputColumnNames: _col0, _col1, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col9 (type: bigint), _col10 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(18,2)), _col8 (type: decimal(18,2)), _col12 (type: double) sort order: ++---++++ - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized @@ -509,7 +509,7 @@ STAGE PLANS: Select Operator expressions: 2000 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: bigint), KEY.reducesinkkey6 (type: decimal(18,2)), KEY.reducesinkkey7 (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 52707204 Data size: 4649844865 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/perf/spark/query80.q.out ql/src/test/results/clientpositive/perf/spark/query80.q.out index ac9d946723..6c5226c5f2 100644 --- ql/src/test/results/clientpositive/perf/spark/query80.q.out +++ ql/src/test/results/clientpositive/perf/spark/query80.q.out @@ -677,7 +677,7 @@ STAGE PLANS: outputColumnNames: _col5, _col6, _col9, _col10, _col18 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -807,7 +807,7 @@ STAGE PLANS: 1 Map 30 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -881,7 +881,7 @@ STAGE PLANS: 1 Map 11 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: string), _col5 (type: decimal(7,2)), COALESCE(_col9,0) (type: decimal(12,2)), (_col6 - COALESCE(_col10,0)) (type: decimal(13,2)) + expressions: _col18 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(12,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(13,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git ql/src/test/results/clientpositive/perf/tez/query78.q.out ql/src/test/results/clientpositive/perf/tez/query78.q.out index e263d6403f..a8fee3bbe4 100644 --- ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -158,15 +158,15 @@ Stage-0 File Output Operator [FS_238] Limit [LIM_237] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_236] (rows=23425424 width=88) + Select Operator [SEL_236] (rows=52707204 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_73] - Select Operator [SEL_72] (rows=23425424 width=88) + Select Operator [SEL_72] (rows=52707204 width=88) Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_71] (rows=23425424 width=88) - predicate:(COALESCE(_col11,0) > 0) - Merge Join Operator [MERGEJOIN_191] (rows=70276272 width=88) + Filter Operator [FIL_71] (rows=52707204 width=88) + predicate:CASE WHEN (_col11 is not null) THEN ((_col11 > 0L)) ELSE (false) END + Merge Join Operator [MERGEJOIN_191] (rows=105414409 width=88) Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_235] @@ -230,8 +230,8 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col1 - Filter Operator [FIL_45] (rows=63887519 width=88) - predicate:(COALESCE(_col7,0) > 0) + Filter Operator [FIL_45] (rows=95831279 width=88) + predicate:CASE WHEN (_col7 is not null) THEN ((_col7 > 0L)) ELSE (false) END Merge Join Operator [MERGEJOIN_190] (rows=191662559 width=88) Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/semijoin4.q.out ql/src/test/results/clientpositive/semijoin4.q.out index f387d40f4d..72b75adf57 100644 --- ql/src/test/results/clientpositive/semijoin4.q.out +++ ql/src/test/results/clientpositive/semijoin4.q.out @@ -63,11 +63,10 @@ POSTHOOK: Input: default@table_18 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-5 is a root stage - Stage-6 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,15 +80,14 @@ STAGE PLANS: predicate: ((tinyint_col_46 = -92Y) and bigint_col_13 is not null and decimal1309_col_65 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: bigint_col_13 (type: bigint), smallint_col_24 (type: smallint), double_col_60 (type: double), decimal1309_col_65 (type: decimal(13,9)) - outputColumnNames: _col0, _col1, _col3, _col4 + expressions: bigint_col_13 (type: bigint), decimal1309_col_65 (type: decimal(13,9)) + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: decimal(27,9)), _col0 (type: bigint) + key expressions: _col2 (type: decimal(27,9)), _col0 (type: bigint) sort order: ++ - Map-reduce partition columns: _col4 (type: decimal(27,9)), _col0 (type: bigint) + Map-reduce partition columns: _col2 (type: decimal(27,9)), _col0 (type: bigint) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) TableScan alias: t2 filterExpr: ((tinyint_col_21 = -92Y) and tinyint_col_18 is not null and decimal2709_col_9 is not null) (type: boolean) @@ -111,9 +109,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: decimal(27,9)), _col0 (type: bigint) + 0 _col2 (type: decimal(27,9)), _col0 (type: bigint) 1 _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -129,7 +126,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) TableScan Reduce Output Operator sort order: @@ -141,65 +137,20 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col3 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint), floor(_col3) (type: bigint) - sort order: +- - Map-reduce partition columns: (UDFToShort(UDFToByte(-92)) + _col1) (type: smallint) - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: smallint), _col3 (type: double) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: smallint), VALUE._col3 (type: double) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: smallint, _col3: double - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: (UDFToShort(UDFToByte(-92)) + _col1) ASC NULLS LAST, floor(_col3) DESC NULLS LAST - partition by: (UDFToShort(UDFToByte(-92)) + _col1) - raw input shape: - window functions: - window function definition - alias: LEAD_window_0 - arguments: -973 - name: LEAD - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true + Select Operator + expressions: 498 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: COALESCE(498,LEAD_window_0,524) (type: int) - outputColumnNames: _col0 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -254,7 +205,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/semijoin5.q.out ql/src/test/results/clientpositive/semijoin5.q.out index 0b466500cc..c047c6d265 100644 --- ql/src/test/results/clientpositive/semijoin5.q.out +++ ql/src/test/results/clientpositive/semijoin5.q.out @@ -54,11 +54,10 @@ POSTHOOK: Input: default@table_1_n1 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-6 + Stage-2 depends on stages: Stage-1, Stage-5 Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-6 is a root stage - Stage-0 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -152,7 +151,7 @@ STAGE PLANS: TableScan Reduce Output Operator key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) - sort order: +- + sort order: -- Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) @@ -171,66 +170,19 @@ STAGE PLANS: Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: (_col7 + UDFToInteger(_col5)) ASC NULLS LAST, floor(_col3) DESC NULLS LAST + order by: (_col7 + UDFToInteger(_col5)) DESC NULLS LAST, floor(_col3) DESC NULLS LAST partition by: (_col7 + UDFToInteger(_col5)) raw input shape: window functions: window function definition - alias: LEAD_window_0 - arguments: -973 - name: LEAD - window function: GenericUDAFLeadEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) - outputColumnNames: LEAD_window_0, _col3, _col5, _col7 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint) - sort order: -- - Map-reduce partition columns: (_col7 + UDFToInteger(_col5)) (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: LEAD_window_0 (type: int), _col3 (type: double), _col5 (type: smallint), _col7 (type: int) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col4 (type: double), VALUE._col6 (type: smallint), VALUE._col8 (type: int) - outputColumnNames: _col0, _col4, _col6, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col4: double, _col6: smallint, _col8: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: (_col8 + UDFToInteger(_col6)) DESC NULLS LAST, floor(_col4) DESC NULLS LAST - partition by: (_col8 + UDFToInteger(_col6)) - raw input shape: - window functions: - window function definition - alias: sum_window_1 + alias: sum_window_0 arguments: 62 name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~FOLLOWING(48) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: COALESCE(498,_col0,524) (type: int), (_col8 + UDFToInteger(_col6)) (type: int), floor(_col4) (type: bigint), COALESCE(sum_window_1,704) (type: bigint) + expressions: 498 (type: int), (_col7 + UDFToInteger(_col5)) (type: int), floor(_col3) (type: bigint), CASE WHEN (sum_window_0 is not null) THEN (sum_window_0) ELSE (704) END (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -241,7 +193,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/vector_coalesce.q.out ql/src/test/results/clientpositive/vector_coalesce.q.out index 9f17983da6..3d0c00bbfb 100644 --- ql/src/test/results/clientpositive/vector_coalesce.q.out +++ ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -35,8 +35,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [6, 2, 4, 1, 17] - selectExpressions: VectorCoalesce(columns [13, 6, 14, 15, 16])(children: ConstantVectorExpression(val null) -> 13:string, col 6:string, CastLongToString(col 2:int) -> 14:string, CastFloatToString(col 4:float) -> 15:string, CastLongToString(col 1:smallint) -> 16:string) -> 17:string + projectedOutputColumnNums: [6, 2, 4, 1, 21] + selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 6:stringcol 20:string)(children: IsNotNull(col 6:string) -> 13:boolean, col 6:string, IfExprCondExprCondExpr(col 14:boolean, col 15:stringcol 19:string)(children: IsNotNull(col 2:int) -> 14:boolean, CastLongToString(col 2:int) -> 15:string, IfExprCondExprCondExpr(col 16:boolean, col 17:stringcol 18:string)(children: IsNotNull(col 4:float) -> 16:boolean, CastFloatToString(col 4:float) -> 17:string, CastLongToString(col 1:smallint) -> 18:string) -> 19:string) -> 20:string) -> 21:string Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -124,8 +124,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 2, 16] - selectExpressions: VectorCoalesce(columns [13, 15, 14])(children: ConstantVectorExpression(val null) -> 13:double, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 16:double + projectedOutputColumnNums: [5, 2, 18] + selectExpressions: IfExprCondExprCondExpr(col 16:boolean, col 17:doublecol 14:double)(children: ColAndCol(col 13:boolean, col 15:boolean)(children: IsNotNull(col 5:double) -> 13:boolean, IsNotNull(col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 15:boolean) -> 16:boolean, DoubleColAddDoubleColumn(col 5:double, col 14:double)(children: FuncLog2LongToDouble(col 2:int) -> 14:double) -> 17:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 18:double Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -298,8 +298,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 9, 13] - selectExpressions: VectorCoalesce(columns [8, 9])(children: col 8:timestamp, col 9:timestamp) -> 13:timestamp + projectedOutputColumnNums: [8, 9, 14] + selectExpressions: IfExprTimestampColumnColumn(col 13:boolean, col 8:timestampcol 9:timestamp)(children: IsNotNull(col 8:timestamp) -> 13:boolean) -> 14:timestamp Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -470,8 +470,8 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [13, 0, 15] - selectExpressions: ConstantVectorExpression(val null) -> 13:bigint, VectorCoalesce(columns [14, 0])(children: ConstantVectorExpression(val null) -> 14:bigint, col 0:tinyint) -> 15:bigint + projectedOutputColumnNums: [13, 0, 0] + selectExpressions: ConstantVectorExpression(val null) -> 13:bigint Limit Vectorization: className: VectorLimitOperator native: true diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index ed1a0762b0..88f4057424 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -60,18 +60,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 6] + selectExpressions: CastStringToLong(col 5:string)(children: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string) -> 6:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 6:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -179,13 +179,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -263,18 +263,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 6] + selectExpressions: CastStringToLong(col 5:string)(children: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string) -> 6:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 6:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -382,13 +382,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + projectedOutputColumnNums: [5] + selectExpressions: IfExprColumnCondExpr(col 3:boolean, col 0:stringcol 4:string)(children: IsNotNull(col 0:string) -> 3:boolean, col 0:string, ConstantVectorExpression(val 0) -> 4:string) -> 5:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_coalesce_3.q.out ql/src/test/results/clientpositive/vector_coalesce_3.q.out index 4b40d93f1c..30cfced506 100644 --- ql/src/test/results/clientpositive/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_3.q.out @@ -129,13 +129,13 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + expressions: _col0 (type: bigint), CASE WHEN (CASE WHEN (_col2 is not null) THEN ((_col2 > 1L)) ELSE (true) END) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3] - selectExpressions: IfExprColumnNull(col 2:boolean, col 1:bigint, null)(children: LongColGreaterLongScalar(col 3:bigint, val 1)(children: VectorCoalesce(columns [1, 2])(children: col 1:bigint, ConstantVectorExpression(val 5) -> 2:bigint) -> 3:bigint) -> 2:boolean, col 1:bigint) -> 3:bigint + projectedOutputColumnNums: [0, 6] + selectExpressions: IfExprColumnNull(col 5:boolean, col 1:bigint, null)(children: IfExprCondExprColumn(col 2:boolean, col 3:boolean, col 4:boolean)(children: IsNotNull(col 1:bigint) -> 2:boolean, LongColGreaterLongScalar(col 1:bigint, val 1) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:boolean) -> 5:boolean, col 1:bigint) -> 6:bigint Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/vector_coalesce_4.q.out ql/src/test/results/clientpositive/vector_coalesce_4.q.out index a3be699431..89aa21b600 100644 --- ql/src/test/results/clientpositive/vector_coalesce_4.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_4.q.out @@ -55,13 +55,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator - expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + expressions: CASE WHEN (a is not null) THEN (a) ELSE (b) END (type: int), a (type: int), b (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 0, 1] - selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + projectedOutputColumnNums: [4, 0, 1] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 0:int, col 1:int)(children: IsNotNull(col 0:int) -> 3:boolean) -> 4:int Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -88,7 +88,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:int, b:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true