diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f60091bf9f..35aae6a6a0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -21,6 +21,7 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -110,6 +111,7 @@ import java.util.List; import java.util.Map; + public class RexNodeConverter { private static class InputCtx { @@ -354,6 +356,12 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); calciteOp = SqlStdOperatorTable.OR; } + } else if (calciteOp.getKind() == SqlKind.COALESCE && + childRexNodeLst.size() > 1 ) { + // Rewrite COALESCE as a CASE + // This allows to be further reduced to OR, if possible + calciteOp = SqlStdOperatorTable.CASE; + childRexNodeLst = rewriteCoalesceChildren(func, childRexNodeLst); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } @@ -537,7 +545,6 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } - private List rewriteToDateChildren(List childRexNodeLst) { List newChildRexNodeLst = new ArrayList(); assert childRexNodeLst.size() == 1; @@ -566,6 +573,25 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + private List rewriteCoalesceChildren( + ExprNodeGenericFuncDesc func, List childRexNodeLst) { + final List convertedChildList = Lists.newArrayList(); + assert childRexNodeLst.size() > 0; + final RexBuilder rexBuilder = cluster.getRexBuilder(); + int i=0; + for (; i < childRexNodeLst.size()-1; ++i ) { + // WHEN child not null THEN child + final RexNode child = childRexNodeLst.get(i); + RexNode childCond = rexBuilder.makeCall( + SqlStdOperatorTable.IS_NOT_NULL, child); + convertedChildList.add(childCond); + convertedChildList.add(child); + } + // Add the last child as the ELSE element + convertedChildList.add(childRexNodeLst.get(i)); + return convertedChildList; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 06c9617818..36e05c9bcd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -374,6 +374,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("is not distinct from", SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, hToken(HiveParser.EQUAL_NS, "<=>")); registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); + registerFunction("coalesce", SqlStdOperatorTable.COALESCE, hToken(HiveParser.Identifier, "coalesce")); // timebased registerFunction("year", HiveExtractDate.YEAR, hToken(HiveParser.Identifier, "year")); diff --git a/ql/src/test/results/clientpositive/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 918ac59c99..4f63e883a9 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -56,18 +56,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -97,7 +97,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -171,13 +171,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -197,7 +197,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 @@ -251,18 +251,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) + expressions: str2 (type: string), UDFToInteger(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END) (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 5] - selectExpressions: CastStringToLong(col 4:string)(children: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string) -> 5:int + projectedOutputColumnNums: [1, 3] + selectExpressions: CastStringToLong(col 4:string)(children: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string) -> 3:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint + aggregators: VectorUDAFSumLong(col 3:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:string @@ -292,7 +292,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -366,13 +366,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: COALESCE(str1,0) (type: string) + expressions: CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4] - selectExpressions: VectorCoalesce(columns [0, 3])(children: col 0:string, ConstantVectorExpression(val 0) -> 3:string) -> 4:string + selectExpressions: VectorUDFAdaptor(CASE WHEN (str1 is not null) THEN (str1) ELSE (0) END)(children: IsNotNull(col 0:string) -> 3:boolean) -> 4:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -392,7 +392,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out index 884078d734..8a58c62816 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_3.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_3.q.out @@ -123,13 +123,13 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) + expressions: _col0 (type: bigint), CASE WHEN (CASE WHEN (_col2 is not null) THEN ((_col2 > 1L)) ELSE (true) END) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3] - selectExpressions: IfExprColumnNull(col 2:boolean, col 1:bigint, null)(children: LongColGreaterLongScalar(col 3:bigint, val 1)(children: VectorCoalesce(columns [1, 2])(children: col 1:bigint, ConstantVectorExpression(val 5) -> 2:bigint) -> 3:bigint) -> 2:boolean, col 1:bigint) -> 3:bigint + projectedOutputColumnNums: [0, 6] + selectExpressions: IfExprColumnNull(col 5:boolean, col 1:bigint, null)(children: IfExprCondExprColumn(col 2:boolean, col 3:boolean, col 4:boolean)(children: IsNotNull(col 1:bigint) -> 2:boolean, LongColGreaterLongScalar(col 1:bigint, val 1) -> 3:boolean, ConstantVectorExpression(val 1) -> 4:boolean) -> 5:boolean, col 1:bigint) -> 6:bigint Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out index 8cb5ffa545..649fb8a5f2 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out @@ -51,13 +51,13 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct] Select Operator - expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int) + expressions: CASE WHEN (a is not null) THEN (a) ELSE (b) END (type: int), a (type: int), b (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 0, 1] - selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int + projectedOutputColumnNums: [4, 0, 1] + selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 0:int, col 1:int)(children: IsNotNull(col 0:int) -> 3:boolean) -> 4:int Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -84,7 +84,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:int, b:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true