From dd4a62c48a043cecd872bc26a08e22dbced998ec Mon Sep 17 00:00:00 2001 From: Gopal V Date: Tue, 22 Mar 2016 22:44:07 -0700 Subject: [PATCH] Transform unix_timestamp(args) into to_unix_timestamp(args) --- .../ql/optimizer/ConstantPropagateProcFactory.java | 22 ++++- ql/src/test/queries/clientpositive/foldts.q | 15 +++ ql/src/test/results/clientpositive/foldts.q.out | 105 +++++++++++++++++++++ 3 files changed, 138 insertions(+), 4 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/foldts.q create mode 100644 ql/src/test/results/clientpositive/foldts.q.out diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index bdc7448..8c1f34d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -77,6 +77,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -229,7 +231,7 @@ private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti, boo public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { GenericUDF udf = funcDesc.getGenericUDF(); - if (!isDeterministicUdf(udf)) { + if (!isDeterministicUdf(udf, funcDesc.getChildren())) { return funcDesc; } return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren()); @@ -347,7 +349,7 @@ private static ExprNodeDesc foldExprShortcut(ExprNodeDesc desc, Map children) { UDFType udfType = udf.getClass().getAnnotation(UDFType.class); if (udf instanceof GenericUDFBridge) { udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class); } if (udfType.deterministic() == false) { + if (udf.getClass().equals(GenericUDFUnixTimeStamp.class) + && children != null && children.size() > 0) { + // unix_timestamp is polymorphic (ignore class annotations) + return true; + } return false; } @@ -817,6 +824,13 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List } } + if (udf instanceof GenericUDFUnixTimeStamp) { + if (newExprs.size() >= 1) { + // unix_timestamp(args) -> to_unix_timestamp(args) + return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs); + } + } + return null; } diff --git ql/src/test/queries/clientpositive/foldts.q ql/src/test/queries/clientpositive/foldts.q new file mode 100644 index 0000000..727d794 --- /dev/null +++ ql/src/test/queries/clientpositive/foldts.q @@ -0,0 +1,15 @@ + +set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1; + +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1; + +create temporary table src1orc stored as orc as select * from src1; + +explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; + +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; diff --git ql/src/test/results/clientpositive/foldts.q.out ql/src/test/results/clientpositive/foldts.q.out new file mode 100644 index 0000000..a730973 --- /dev/null +++ ql/src/test/results/clientpositive/foldts.q.out @@ -0,0 +1,105 @@ +PREHOOK: query: explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +1969-12-31 15:59:46.674 -14 -14 +PREHOOK: query: create temporary table src1orc stored as orc as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@src1orc +POSTHOOK: query: create temporary table src1orc stored as orc as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src1orc +PREHOOK: query: explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +Wednesday -- 2.4.0