From 3fc94c1d77e006d2a3297709e48019c89730c504 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 10 Mar 2016 10:39:19 -0800 Subject: [PATCH] HIVE-13250 : Compute predicate conversions on the client, instead of per row group --- .../calcite/translator/RexNodeConverter.java | 14 ++- .../test/queries/clientpositive/cast_on_constant.q | 4 + .../results/clientpositive/cast_on_constant.q.out | 114 +++++++++++++++++++++ 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/cast_on_constant.q create mode 100644 ql/src/test/results/clientpositive/cast_on_constant.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index ee4f4ea..e3dbcb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -69,6 +69,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; @@ -181,8 +182,17 @@ private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticExcep assert func.getChildren().size() == 2; // TODO: checking 2 children is useless, compare already does that. } else if (isCompare && (func.getChildren().size() == 2)) { - tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) - .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); + ExprNodeDesc child0 = func.getChildren().get(0); + ExprNodeDesc child1 = func.getChildren().get(1); + if (GenericUDFOPEqual.class.getName().equals(func.getGenericUDF().getUdfName()) && child0 instanceof ExprNodeConstantDesc && !(child1 instanceof ExprNodeConstantDesc)) { + func.getChildren().set(0,ParseUtils.createConversionCast(child0, (PrimitiveTypeInfo)child1.getTypeInfo())); + tgtDT = null; + } else if (GenericUDFOPEqual.class.getName().equals(func.getGenericUDF().getUdfName()) && (child1 instanceof ExprNodeConstantDesc && !(child0 instanceof ExprNodeConstantDesc))) { + func.getChildren().set(1,ParseUtils.createConversionCast(child1, (PrimitiveTypeInfo)child0.getTypeInfo())); + tgtDT = null; + } else { + tgtDT = FunctionRegistry.getCommonClassForComparison(child0.getTypeInfo(), child1.getTypeInfo()); + } } else if (isWhenCase) { // If it is a CASE or WHEN, we need to check that children do not contain stateful functions // as they are not allowed diff --git a/ql/src/test/queries/clientpositive/cast_on_constant.q b/ql/src/test/queries/clientpositive/cast_on_constant.q new file mode 100644 index 0000000..81ac369 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cast_on_constant.q @@ -0,0 +1,4 @@ +create table t1(ts_field timestamp); +explain select count(*) from t1 where ts_field = "2016-01-23 00:00:00"; +explain select count(*) from t1 where ts_field = 1453507200; +drop table t1; diff --git a/ql/src/test/results/clientpositive/cast_on_constant.q.out b/ql/src/test/results/clientpositive/cast_on_constant.q.out new file mode 100644 index 0000000..8ac2597 --- /dev/null +++ b/ql/src/test/results/clientpositive/cast_on_constant.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: create table t1(ts_field timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(ts_field timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: explain select count(*) from t1 where ts_field = "2016-01-23 00:00:00" +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from t1 where ts_field = "2016-01-23 00:00:00" +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select count(*) from t1 where ts_field = 1453507200 +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) from t1 where ts_field = 1453507200 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (ts_field = 1970-01-17 11:45:07.2) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 -- 1.7.12.4 (Apple Git-37)