From caa30317d53d979a4c518e9dd3461cbef6f2ac13 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Fri, 8 May 2015 18:06:27 -0700 Subject: [PATCH] HIVE-10636 : CASE comparison operator rotation optimization --- .../ql/optimizer/ConstantPropagateProcFactory.java | 48 ++++++++++++++++++++++ .../clientpositive/fold_eq_with_case_when.q | 1 + .../clientpositive/fold_eq_with_case_when.q.out | 36 ++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/fold_eq_with_case_when.q create mode 100644 ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 3486cee..5e75d4f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -90,6 +90,7 @@ import org.apache.hadoop.io.NullWritable; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; /** * Factory for generating the different node processors used by ConstantPropagate. @@ -364,6 +365,53 @@ private static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) { } private static ExprNodeDesc shortcutFunction(GenericUDF udf, List newExprs) throws UDFArgumentException { + + if (udf instanceof GenericUDFOPEqual) { + assert newExprs.size() == 2; + boolean foundUDFInFirst = false; + ExprNodeGenericFuncDesc caseOrWhenexpr = null; + if (newExprs.get(0) instanceof ExprNodeGenericFuncDesc) { + caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(0); + if (caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase) { + foundUDFInFirst = true; + } + } + if (!foundUDFInFirst && newExprs.get(1) instanceof ExprNodeGenericFuncDesc) { + caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(1); + if (!(caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase)) { + return null; + } + } + GenericUDF childUDF = caseOrWhenexpr.getGenericUDF(); + List children = caseOrWhenexpr.getChildren(); + int i; + if (childUDF instanceof GenericUDFWhen) { + for (i = 1; i < children.size(); i+=2) { + children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), + Lists.newArrayList(children.get(i),newExprs.get(foundUDFInFirst ? 1 : 0)))); + } + if(children.size() % 2 == 1) { + i = children.size()-1; + children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), + Lists.newArrayList(children.get(i),newExprs.get(foundUDFInFirst ? 1 : 0)))); + } + return caseOrWhenexpr; + } else if (childUDF instanceof GenericUDFCase) { + for (i = 2; i < children.size(); i+=2) { + children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), + Lists.newArrayList(children.get(i),newExprs.get(foundUDFInFirst ? 1 : 0)))); + } + if(children.size() % 2 == 0) { + i = children.size()-1; + children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), + Lists.newArrayList(children.get(i),newExprs.get(foundUDFInFirst ? 1 : 0)))); + } + return caseOrWhenexpr; + } else { + // cant happen + return null; + } + } if (udf instanceof GenericUDFOPAnd) { for (int i = 0; i < 2; i++) { ExprNodeDesc childExpr = newExprs.get(i); diff --git a/ql/src/test/queries/clientpositive/fold_eq_with_case_when.q b/ql/src/test/queries/clientpositive/fold_eq_with_case_when.q new file mode 100644 index 0000000..5f9e011 --- /dev/null +++ b/ql/src/test/queries/clientpositive/fold_eq_with_case_when.q @@ -0,0 +1 @@ +explain select key from src where (case key when '238' then 1 else 2 end) = 1; diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out new file mode 100644 index 0000000..3d0095b --- /dev/null +++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -0,0 +1,36 @@ +PREHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key from src where (case key when '238' then 1 else 2 end) = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = '238') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + -- 1.7.12.4 (Apple Git-37)