diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index b323cb5..b7a067a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -29,6 +29,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -675,6 +676,21 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, Object.. if (colList != null) { for (int i = 0; i < colList.size(); i++) { ExprNodeDesc newCol = foldExpr(colList.get(i), constants, cppCtx, op, 0, false); + if (!(colList.get(i) instanceof ExprNodeConstantDesc) && newCol instanceof ExprNodeConstantDesc) { + // Lets try to store original column name, if this column got folded + // This is useful for optimizations like GroupByOptimizer + String colName = colList.get(i).getExprString(); + if (HiveConf.getPositionFromInternalName(colName) == -1) { + // if its not an internal name, this is what we want. + ((ExprNodeConstantDesc)newCol).setFoldedFromCol(colName); + } else { + // If it was internal column, lets try to get name from columnExprMap + ExprNodeDesc desc = columnExprMap.get(colName); + if (desc instanceof ExprNodeConstantDesc) { + ((ExprNodeConstantDesc)newCol).setFoldedFromCol(((ExprNodeConstantDesc)desc).getFoldedFromCol()); + } + } + } colList.set(i, newCol); if (columnExprMap != null) { columnExprMap.put(columnNames.get(i), newCol); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index d06522f..1d18e0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -332,18 +332,26 @@ protected GroupByOptimizerSortMatch checkSortGroupBy(Stack stack, continue; } - ExprNodeDesc selectColList = selectDesc.getColList().get(pos); - if (selectColList instanceof ExprNodeColumnDesc) { + ExprNodeDesc selectCol = selectDesc.getColList().get(pos); + if (selectCol instanceof ExprNodeColumnDesc) { String newValue = - tableColsMapping.get(((ExprNodeColumnDesc) selectColList).getColumn()); + tableColsMapping.get(((ExprNodeColumnDesc) selectCol).getColumn()); tableColsMapping.put(outputColumnName, newValue); } else { tableColsMapping.remove(outputColumnName); - if ((selectColList instanceof ExprNodeConstantDesc) || - (selectColList instanceof ExprNodeNullDesc)) { + if (selectCol instanceof ExprNodeNullDesc) { newConstantCols.add(outputColumnName); } + if (selectCol instanceof ExprNodeConstantDesc) { + // Lets see if this constant was folded because of optimization. + String origCol = ((ExprNodeConstantDesc) selectCol).getFoldedFromCol(); + if (origCol != null) { + tableColsMapping.put(outputColumnName, origCol); + } else { + newConstantCols.add(outputColumnName); + } + } } } @@ -351,7 +359,6 @@ protected GroupByOptimizerSortMatch checkSortGroupBy(Stack stack, } } - boolean sortGroupBy = true; // compute groupby columns from groupby keys List groupByCols = new ArrayList(); // If the group by expression is anything other than a list of columns, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 0a58200..bc4ad2f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -70,13 +70,14 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ListBucketingPruner()); } } + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + transformations.add(new ConstantPropagate()); + } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) { transformations.add(new GroupByOptimizer()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - transformations.add(new ConstantPropagate()); - } transformations.add(new ColumnPruner()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) { transformations.add(new SkewJoinOptimizer()); diff --git a/ql/src/test/results/compiler/plan/cast1.q.xml b/ql/src/test/results/compiler/plan/cast1.q.xml index e135567..1bba170 100644 --- a/ql/src/test/results/compiler/plan/cast1.q.xml +++ b/ql/src/test/results/compiler/plan/cast1.q.xml @@ -379,6 +379,9 @@ _col6 + + UDFToInteger(true) + @@ -390,6 +393,9 @@ _col5 + + UDFToBoolean(1) + @@ -401,6 +407,9 @@ _col4 + + (3 + UDFToInteger(2.0)) + @@ -412,6 +421,9 @@ _col3 + + (3.0 + 2.0) + @@ -423,6 +435,9 @@ _col2 + + (3 + 2.0) + @@ -434,6 +449,9 @@ _col1 + + (3.0 + 2) + @@ -445,6 +463,9 @@ _col0 + + (3 + 2) + diff --git a/ql/src/test/results/compiler/plan/udf1.q.xml b/ql/src/test/results/compiler/plan/udf1.q.xml index 44988ac..a6c45b2 100644 --- a/ql/src/test/results/compiler/plan/udf1.q.xml +++ b/ql/src/test/results/compiler/plan/udf1.q.xml @@ -535,6 +535,9 @@ _col8 + + ('' rlike '.*') + @@ -546,6 +549,9 @@ _col7 + + ('ab' like 'a') + @@ -557,6 +563,9 @@ _col6 + + ('ab' like '_a%') + @@ -568,6 +577,9 @@ _col5 + + ('ab' like '\%\_') + @@ -579,6 +591,9 @@ _col4 + + ('%_' like '\%\_') + @@ -590,6 +605,9 @@ _col3 + + ('ab' like '%a_') + @@ -601,6 +619,9 @@ _col2 + + ('ab' like '%a%') + @@ -612,6 +633,9 @@ _col1 + + ('b' like '%a%') + @@ -623,6 +647,9 @@ _col9 + + ('a' rlike '[ab]') + @@ -634,6 +661,9 @@ _col13 + + regexp_replace('abc', 'b', 'c') + @@ -645,6 +675,9 @@ _col12 + + ('hadoop' rlike 'o*') + @@ -656,6 +689,9 @@ _col11 + + ('hadoop' rlike '[a-z]*') + @@ -667,6 +703,9 @@ _col10 + + ('' rlike '[ab]') + @@ -678,6 +717,9 @@ _col16 + + regexp_replace('hadoop', '(.)[a-z]*', '$1ive') + @@ -689,6 +731,9 @@ _col15 + + regexp_replace('abbbb', 'bb', 'b') + @@ -700,6 +745,9 @@ _col14 + + regexp_replace('abc', 'z', 'a') + @@ -711,6 +759,9 @@ _col0 + + ('a' like '%a%') + diff --git a/ql/src/test/results/compiler/plan/udf4.q.xml b/ql/src/test/results/compiler/plan/udf4.q.xml index 4ca78bf..1edf38a 100644 --- a/ql/src/test/results/compiler/plan/udf4.q.xml +++ b/ql/src/test/results/compiler/plan/udf4.q.xml @@ -548,6 +548,9 @@ _col8 + + sqrt(0.0) + @@ -563,6 +566,9 @@ _col6 + + sqrt(1.0) + @@ -574,6 +580,9 @@ _col5 + + floor((- 1.5)) + @@ -585,6 +594,9 @@ _col4 + + floor(1.5) + @@ -596,6 +608,9 @@ _col3 + + floor(1.0) + @@ -607,6 +622,9 @@ _col2 + + round((- 1.5)) + @@ -618,6 +636,9 @@ _col1 + + round(1.5) + @@ -629,6 +650,9 @@ _col9 + + ceil(1.0) + @@ -672,6 +696,9 @@ _col12 + + ceil(1.0) + @@ -683,6 +710,9 @@ _col11 + + ceil((- 1.5)) + @@ -694,6 +724,9 @@ _col10 + + ceil(1.5) + @@ -705,6 +738,9 @@ _col17 + + (1 + (- 2)) + @@ -716,6 +752,9 @@ _col16 + + (1 + 2) + @@ -727,6 +766,9 @@ _col15 + + (- 3) + @@ -749,6 +791,9 @@ _col0 + + round(1.0) + @@ -760,6 +805,9 @@ _col18 + + (~ 1) + diff --git a/ql/src/test/results/compiler/plan/udf6.q.xml b/ql/src/test/results/compiler/plan/udf6.q.xml index 24008df..c4c8980 100644 --- a/ql/src/test/results/compiler/plan/udf6.q.xml +++ b/ql/src/test/results/compiler/plan/udf6.q.xml @@ -338,6 +338,9 @@ _col0 + + concat('a', 'b') + diff --git a/ql/src/test/results/compiler/plan/udf_case.q.xml b/ql/src/test/results/compiler/plan/udf_case.q.xml index dc620cb..3518469 100644 --- a/ql/src/test/results/compiler/plan/udf_case.q.xml +++ b/ql/src/test/results/compiler/plan/udf_case.q.xml @@ -351,6 +351,9 @@ _col0 + + CASE (1) WHEN (1) THEN (2) WHEN (3) THEN (4) ELSE (5) END + diff --git a/ql/src/test/results/compiler/plan/udf_when.q.xml b/ql/src/test/results/compiler/plan/udf_when.q.xml index dc620cb..4a1d604 100644 --- a/ql/src/test/results/compiler/plan/udf_when.q.xml +++ b/ql/src/test/results/compiler/plan/udf_when.q.xml @@ -351,6 +351,9 @@ _col0 + + CASE WHEN ((1 = 1)) THEN (2) WHEN ((3 = 5)) THEN (4) ELSE (5) END +