From 817de370165982d5978a97f4e72b6f44b62428f2 Mon Sep 17 00:00:00 2001 From: Nishant Date: Thu, 28 Jun 2018 00:27:00 +0530 Subject: [PATCH] [HIVE-20013] Add implicit cast for TO_DATE function --- .../calcite/translator/RexNodeConverter.java | 19 ++++++ .../queries/clientpositive/druidmini_expressions.q | 22 ++++++ .../druid/druidmini_expressions.q.out | 79 ++++++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 7cedab62fa..cbf0a3fd67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -347,6 +348,8 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { calciteOp = SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=") .getGenericUDF(), argTypeBldr.build(), retType); + } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { + childRexNodeLst = rewriteToDateChildren(childRexNodeLst); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { @@ -528,6 +531,22 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + + private List rewriteToDateChildren(List childRexNodeLst) { + List newChildRexNodeLst = new ArrayList(); + assert childRexNodeLst.size() == 1; + RexNode child = childRexNodeLst.get(0); + if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval( + child.getType())) { + newChildRexNodeLst.add(child); + } else { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), + child)); + } + return newChildRexNodeLst; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/test/queries/clientpositive/druidmini_expressions.q b/ql/src/test/queries/clientpositive/druidmini_expressions.q index fad8f73520..764e115a20 100644 --- a/ql/src/test/queries/clientpositive/druidmini_expressions.q +++ b/ql/src/test/queries/clientpositive/druidmini_expressions.q @@ -117,6 +117,7 @@ EXPLAIN SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble FROM `default`.`druid_table_n0` `druid_table_alias` GROUP BY CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE); + SELECT SUM((`druid_table_alias`.`cdouble` * `druid_table_alias`.`cdouble`)) AS `sum_calculation_4998925219892510720_ok`, CAST(TRUNC(CAST(`druid_table_alias`.`__time` AS TIMESTAMP),'MM') AS DATE) AS `tmn___time_ok` FROM `default`.`druid_table_n0` `druid_table_alias` @@ -135,3 +136,24 @@ SELECT DATE_ADD(cast(`__time` as date), CAST((cdouble / 1000) AS INT)) as date_1 EXPLAIN SELECT ctinyint > 2, count(*) from druid_table_n0 GROUP BY ctinyint > 2; DROP TABLE druid_table_n0; + +-- Tests for testing handling of date/time funtions on druid dimensions stored as strings +CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1); + +EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; + +SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1; diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index fd77a915d9..9df0100e2b 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -1347,3 +1347,82 @@ POSTHOOK: query: DROP TABLE druid_table_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@druid_table_n0 POSTHOOK: Output: default@druid_table_n0 +PREHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_table_n1 +POSTHOOK: query: CREATE TABLE druid_table_n1 +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "HOUR", "druid.query.granularity" = "MINUTE") +AS + SELECT cast (current_timestamp() as timestamp with local time zone) as `__time`, +cast(datetime1 as string) as datetime1, +cast(date1 as string) as date1, +cast(time1 as string) as time1 +FROM TABLE ( +VALUES +('2004-04-09 22:20:14', '2004-04-09','22:20:14'), +('2004-04-04 22:50:16', '2004-04-04', '22:50:16'), +('2004-04-12 04:40:49', '2004-04-12', '04:40:49'), +('2004-04-11 00:00:00', '2004-04-11', null), +('00:00:00 18:58:41', null, '18:58:41')) as q (datetime1, date1, time1) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_table_n1 +POSTHOOK: Lineage: druid_table_n1.__time SIMPLE [] +POSTHOOK: Lineage: druid_table_n1.date1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.datetime1 SCRIPT [] +POSTHOOK: Lineage: druid_table_n1.time1 SCRIPT [] +PREHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_table_n1 + properties: + druid.fieldNames vc,vc0 + druid.fieldTypes date,date + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_n1","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(\"date1\",'','US/Pacific'),'P1D','','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_floor(timestamp_parse(\"datetime1\",'','US/Pacific'),'P1D','','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: date), vc0 (type: date) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_table_n1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT TO_DATE(date1), TO_DATE(datetime1) FROM druid_table_n1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_table_n1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1969-12-31 1969-12-31 +2004-04-04 2004-04-04 +2004-04-09 2004-04-09 +2004-04-11 2004-04-11 +2004-04-12 2004-04-12 -- 2.15.1 (Apple Git-101)