From 55cd1a5d640d694e874733d23218cc3b97101026 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Fri, 29 Jun 2018 23:13:07 -0700 Subject: [PATCH] HIVE-20013 : Add an Implicit cast to date type for to_date function (Nishant Bangarwa via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan --- .../calcite/translator/RexNodeConverter.java | 19 ++++++ .../queries/clientpositive/druidmini_expressions.q | 22 ++++++ .../druid/druidmini_expressions.q.out | 79 ++++++++++++++++++++++ .../clientpositive/fold_eq_with_case_when.q.out | 4 +- ql/src/test/results/clientpositive/llap/kryo.q.out | 2 +- 5 files changed, 123 insertions(+), 3 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 2ae015adf4..576a2a80c0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -346,6 +347,8 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { calciteOp = SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=") .getGenericUDF(), argTypeBldr.build(), retType); + } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { + childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { @@ -525,6 +528,22 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + + private List rewriteToDateChildren(List childRexNodeLst) { + List newChildRexNodeLst = new ArrayList(); + assert childRexNodeLst.size() == 1; + RexNode child = childRexNodeLst.get(0); + if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval( + child.getType())) { + newChildRexNodeLst.add(child); + } else { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), + child)); + } + return newChildRexNodeLst; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index fad8f73520..764e115a20 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -117,6 +117,7 @@ EXPLAIN SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble FROM `default`.`druid_table_n0` `druid_table_alias` GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE); + SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble`)) AS `sum_calculation_4998925219892510720_ok`, CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE) AS `tmn___time_ok` FROM `default`.`druid_table_n0` `druid_table_alias` @@ -135,3 +136,24 @@ SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1 EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2; DROP TABLE druid_table_n0; + +-- Tests for testing handling of date/time funtions on druid dimensions stored as strings +CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1); + +EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; + +SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index fd77a915d9..9df0100e2b 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -1347,3 +1347,82 @@ POSTHOOK: query: DROP TABLE druid_table_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: default@druid_table_n0 +PREHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_n1 +POSTHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_n1 +POSTHOOK: Lineage: druid_table_n1.__time SIMPLE [] +POSTHOOK: Lineage: druid_table_n1.date1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.datetime1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.time1 SCRIPT [] +PREHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_n1 + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes date,date + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(\"date1\",'','US/Pacific'),'P1D','','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_floor(timestamp_parse(\"datetime1\",'','US/Pacific'),'P1D','','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: date), vc0 (type: date) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 1969-12-31 +2004-04-04 2004-04-04 +2004-04-09 2004-04-09 +2004-04-11 2004-04-11 +2004-04-12 2004-04-12 diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index d06fb60345..fdb41c42c9 100644 --- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -40,10 +40,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lineitem - filterExpr: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean) + filterExpr: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((DATE'1996-03-30' = to_date(l_shipdate)) and (l_shipmode = 'RAIL')) (type: boolean) + predicate: ((DATE'1996-03-30' = to_date(CAST( l_shipdate AS TIMESTAMP))) and (l_shipmode = 'RAIL')) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), (UDFToDouble(l_partkey) / 1000000.0D) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/kryo.q.out b/ql/src/test/results/clientpositive/llap/kryo.q.out index 234bae89c7..a291debf46 100644 --- a/ql/src/test/results/clientpositive/llap/kryo.q.out +++ b/ql/src/test/results/clientpositive/llap/kryo.q.out @@ -50,7 +50,7 @@ STAGE PLANS: predicate: ((id = 1) or (id = 2) or (id = 3) or (id = 4) or (id = 5) or (id = 6)) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(datetime) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(datetime))) THEN (date_) ELSE (to_date(datetime)) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string) + expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(CAST( datetime AS TIMESTAMP)) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(CAST( datetime AS TIMESTAMP)))) THEN (date_) ELSE (to_date(CAST( datetime AS TIMESTAMP))) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Group By Operator -- 2.14.3 (Apple Git-98)