diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 718a2d0bd0..76650267b5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -722,17 +722,22 @@ public ASTNode visitCall(RexCall call) { } break; case CAST: - HiveToken ht = TypeConverter.hiveToken(call.getType()); - ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); - if (ht.args != null) { - for (String castArg : ht.args) { - astBldr.add(HiveParser.Identifier, castArg); - } - } - astNodeLst.add(astBldr.node()); - for (RexNode operand : call.operands) { - astNodeLst.add(operand.accept(this)); + assert(call.getOperands().size() == 1); + if(call.getType().isStruct()) { + // cast for struct types can be ignored safely because explicit casting on struct + // types are not possible, implicit casting e.g. CAST(ROW__ID as <...>) can be ignored + return call.getOperands().get(0).accept(this); } + + HiveToken ht = TypeConverter.hiveToken(call.getType()); + ASTBuilder astBldr = ASTBuilder.construct(ht.type, ht.text); + if (ht.args != null) { + for (String castArg : ht.args) { + astBldr.add(HiveParser.Identifier, castArg); + } + } + astNodeLst.add(astBldr.node()); + astNodeLst.add(call.getOperands().get(0).accept(this)); break; case EXTRACT: // Extract on date: special handling since function in Hive does diff --git a/ql/src/test/queries/clientpositive/npe.q b/ql/src/test/queries/clientpositive/npe.q new file mode 100644 index 0000000000..06c9149ee3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/npe.q @@ -0,0 +1,50 @@ +CREATE TABLE table_16 ( +timestamp_col_19 timestamp, +timestamp_col_29 timestamp, +int_col_27 int, +int_col_39 int, +boolean_col_18 boolean, +varchar0045_col_23 varchar(45) +); + + +CREATE TABLE table_7 ( +int_col_10 int, +bigint_col_3 bigint +); + +CREATE TABLE table_10 ( +boolean_col_8 boolean, +boolean_col_16 boolean, +timestamp_col_5 timestamp, +timestamp_col_15 timestamp, +timestamp_col_30 timestamp, +decimal3825_col_26 decimal(38, 25), +smallint_col_9 smallint, +int_col_18 int +); + +explain cbo +SELECT + DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16, a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col +FROM table_7 a3 +RIGHT JOIN table_10 a4 +WHERE (a3.bigint_col_3) >= (a4.int_col_18) +INTERSECT ALL +SELECT + COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ) AS timestamp_col +FROM table_16 a1 + GROUP BY COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST( + COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ); diff --git a/ql/src/test/results/clientpositive/llap/npe.q.out b/ql/src/test/results/clientpositive/llap/npe.q.out new file mode 100644 index 0000000000..fd3e5a6dbf --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/npe.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: CREATE TABLE table_16 ( +timestamp_col_19 timestamp, +timestamp_col_29 timestamp, +int_col_27 int, +int_col_39 int, +boolean_col_18 boolean, +varchar0045_col_23 varchar(45) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_16 +POSTHOOK: query: CREATE TABLE table_16 ( +timestamp_col_19 timestamp, +timestamp_col_29 timestamp, +int_col_27 int, +int_col_39 int, +boolean_col_18 boolean, +varchar0045_col_23 varchar(45) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_16 +PREHOOK: query: CREATE TABLE table_7 ( +int_col_10 int, +bigint_col_3 bigint +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_7 +POSTHOOK: query: CREATE TABLE table_7 ( +int_col_10 int, +bigint_col_3 bigint +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_7 +PREHOOK: query: CREATE TABLE table_10 ( +boolean_col_8 boolean, +boolean_col_16 boolean, +timestamp_col_5 timestamp, +timestamp_col_15 timestamp, +timestamp_col_30 timestamp, +decimal3825_col_26 decimal(38, 25), +smallint_col_9 smallint, +int_col_18 int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_10 +POSTHOOK: query: CREATE TABLE table_10 ( +boolean_col_8 boolean, +boolean_col_16 boolean, +timestamp_col_5 timestamp, +timestamp_col_15 timestamp, +timestamp_col_30 timestamp, +decimal3825_col_26 decimal(38, 25), +smallint_col_9 smallint, +int_col_18 int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_10 +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo +SELECT + DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16, a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col +FROM table_7 a3 +RIGHT JOIN table_10 a4 +WHERE (a3.bigint_col_3) >= (a4.int_col_18) +INTERSECT ALL +SELECT + COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ) AS timestamp_col +FROM table_16 a1 + GROUP BY COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST( + COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@table_10 +PREHOOK: Input: default@table_16 +PREHOOK: Input: default@table_7 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +SELECT + DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16, a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col +FROM table_7 a3 +RIGHT JOIN table_10 a4 +WHERE (a3.bigint_col_3) >= (a4.int_col_18) +INTERSECT ALL +SELECT + COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ) AS timestamp_col +FROM table_16 a1 + GROUP BY COALESCE(LEAST( + COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS TIMESTAMP)) + ), + GREATEST( + COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS TIMESTAMP)), + COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS TIMESTAMP))) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_10 +POSTHOOK: Input: default@table_16 +POSTHOOK: Input: default@table_7 +#### A masked pattern was here #### +CBO PLAN: +HiveProject($f0=[$1]) + HiveTableFunctionScan(invocation=[replicate_rows($0, $1)], rowType=[RecordType(BIGINT $f0, TIMESTAMP(9) $f1)]) + HiveProject($f0=[$2], $f1=[$0]) + HiveFilter(condition=[=($1, 2)]) + HiveAggregate(group=[{0}], agg#0=[count($1)], agg#1=[min($1)]) + HiveProject($f0=[$0], $f1=[$1]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[CASE(IS NOT NULL($8), $8, if($6, $9, $7))]) + HiveJoin(condition=[>=($1, $16)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(int_col_10=[$0], bigint_col_3=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, table_7]], table:alias=[a3]) + HiveProject(boolean_col_8=[$0], boolean_col_16=[$1], timestamp_col_5=[$2], timestamp_col_15=[$3], timestamp_col_30=[$4], decimal3825_col_26=[$5], smallint_col_9=[$6], int_col_18=[$7], BLOCK__OFFSET__INSIDE__FILE=[$8], INPUT__FILE__NAME=[$9], ROW__ID=[$10], CAST=[CAST($7):BIGINT]) + HiveFilter(condition=[IS NOT NULL(CAST($7):BIGINT)]) + HiveTableScan(table=[[default, table_10]], table:alias=[a4]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[CASE(IS NOT NULL(least(CASE(IS NOT NULL($0), $0, 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2014-08-16 00:00:00:TIMESTAMP(9)))), least(CASE(IS NOT NULL($0), $0, 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2014-08-16 00:00:00:TIMESTAMP(9))), greatest(CASE(IS NOT NULL($0), $0, 2013-07-01 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2028-06-18 00:00:00:TIMESTAMP(9))))]) + HiveTableScan(table=[[default, table_16]], table:alias=[a1]) +