diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 3f95be2..f7ae147 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -100,6 +100,7 @@ import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.UDFConv; +import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime; import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract; import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; @@ -759,6 +760,7 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { || udfClass.equals(UDFRegExpExtract.class) || udfClass.equals(UDFRegExpReplace.class) || udfClass.equals(UDFConv.class) + || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr)) || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) || udfClass.equals(UDFToString.class) && diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index bdc7448..8c1f34d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -77,6 +77,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -229,7 +231,7 @@ private static ExprNodeConstantDesc typeCast(ExprNodeDesc desc, TypeInfo ti, boo public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { GenericUDF udf = funcDesc.getGenericUDF(); - if (!isDeterministicUdf(udf)) { + if (!isDeterministicUdf(udf, funcDesc.getChildren())) { return funcDesc; } return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren()); @@ -347,7 +349,7 @@ private static ExprNodeDesc foldExprShortcut(ExprNodeDesc desc, Map children) { UDFType udfType = udf.getClass().getAnnotation(UDFType.class); if (udf instanceof GenericUDFBridge) { udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class); } if (udfType.deterministic() == false) { + if (udf.getClass().equals(GenericUDFUnixTimeStamp.class) + && children != null && children.size() > 0) { + // unix_timestamp is polymorphic (ignore class annotations) + return true; + } return false; } @@ -817,6 +824,13 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List } } + if (udf instanceof GenericUDFUnixTimeStamp) { + if (newExprs.size() >= 1) { + // unix_timestamp(args) -> to_unix_timestamp(args) + return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs); + } + } + return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f674ece..e76fa6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFDegrees; import org.apache.hadoop.hive.ql.udf.UDFExp; +import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime; import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFHour; import org.apache.hadoop.hive.ql.udf.UDFLength; @@ -247,6 +248,7 @@ public Vectorizer() { supportedGenericUDFs.add(UDFSecond.class); supportedGenericUDFs.add(UDFWeekOfYear.class); supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); + supportedGenericUDFs.add(UDFFromUnixTime.class); supportedGenericUDFs.add(GenericUDFDateAdd.class); supportedGenericUDFs.add(GenericUDFDateSub.class); diff --git ql/src/test/queries/clientpositive/foldts.q ql/src/test/queries/clientpositive/foldts.q new file mode 100644 index 0000000..362cac2 --- /dev/null +++ ql/src/test/queries/clientpositive/foldts.q @@ -0,0 +1,20 @@ + +set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1; + +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1; + +create temporary table src1orc stored as orc as select * from src1; + +explain +select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; + +select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; + +explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; + +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; diff --git ql/src/test/results/clientpositive/foldts.q.out ql/src/test/results/clientpositive/foldts.q.out new file mode 100644 index 0000000..24785c6 --- /dev/null +++ ql/src/test/results/clientpositive/foldts.q.out @@ -0,0 +1,154 @@ +PREHOOK: query: explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +1969-12-31 15:59:46.674 -14 -14 +PREHOOK: query: create temporary table src1orc stored as orc as select * from src1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src1 +PREHOOK: Output: database:default +PREHOOK: Output: default@src1orc +POSTHOOK: query: create temporary table src1orc stored as orc as select * from src1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src1orc +PREHOOK: query: explain +select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +Wednesday +PREHOOK: query: explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +Wednesday