diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3f2aaaa..4a9bb29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -165,6 +165,7 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -2480,6 +2481,32 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } /** + * FIgure out if this parameter in the aggregation tree is a constant + */ + private ExprNodeDesc getConstantParameter(List reduceValues, String internalName, + boolean isAllColumns) { + + String[] terms = internalName.split("\\."); + if (terms.length != 2 || reduceValues == null) { + return null; + } + + if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) { + int pos = getPositionFromInternalName(terms[1]); + if (pos >= 0 && pos < reduceValues.size()) { + ExprNodeDesc reduceValue = reduceValues.get(pos); + if (reduceValue != null) { + if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) { + return reduceValue; + } + } + } + } + + return null; + } + + /** * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)). * The new GroupByOperator will be a child of the reduceSinkOperatorInfo. * @@ -2528,12 +2555,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // get the last colName for the reduce KEY // it represents the column name corresponding to distinct aggr, if any String lastKeyColName = null; + List reduceValues = null; if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) { List inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames(); if (inputKeyCols.size() > 0) { lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1); } + reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols(); } int numDistinctUDFs = 0; for (Map.Entry entry : aggregationTrees.entrySet()) { @@ -2565,9 +2594,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { getColumnInternalName(i-1); } - aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), + + ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), - paraExprInfo.getIsVirtualCol())); + paraExprInfo.getIsVirtualCol()); + ExprNodeDesc reduceValue = getConstantParameter(reduceValues, + paraExprInfo.getInternalName(), isAllColumns); + + if (reduceValue != null) { + // this parameter is a constant + expr = reduceValue; + } + + aggParameters.add(expr); } if (isDistinct) { @@ -2653,12 +2692,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // get the last colName for the reduce KEY // it represents the column name corresponding to distinct aggr, if any String lastKeyColName = null; + List reduceValues = null; if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) { List inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames(); if (inputKeyCols.size() > 0) { lastKeyColName = inputKeyCols.get(inputKeyCols.size()-1); } + reduceValues = ((ReduceSinkDesc)reduceSinkOperatorInfo.getConf()).getValueCols(); } int numDistinctUDFs = 0; for (Map.Entry entry : aggregationTrees.entrySet()) { @@ -2666,6 +2707,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { String aggName = value.getChild(0).getText(); ArrayList aggParameters = new ArrayList(); boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI); + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; // If the function is distinct, partial aggregartion has not been done on // the client side. @@ -2699,9 +2741,20 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { + getColumnInternalName(i-1); } - aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), + + ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), - paraExprInfo.getIsVirtualCol())); + paraExprInfo.getIsVirtualCol()); + ExprNodeDesc reduceValue = getConstantParameter(reduceValues, + paraExprInfo.getInternalName(), isAllColumns); + + if (reduceValue != null) { + // this parameter is a constant + expr = reduceValue; + } + + aggParameters.add(expr); + } } else { ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value); @@ -2717,7 +2770,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (isDistinct) { numDistinctUDFs++; } - boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); GenericUDAFEvaluator genericUDAFEvaluator = null; // For distincts, partial aggregations have not been done diff --git ql/src/test/queries/clientpositive/udaf_percentile_approx.q ql/src/test/queries/clientpositive/udaf_percentile_approx.q index c436a63..9ab09de 100644 --- ql/src/test/queries/clientpositive/udaf_percentile_approx.q +++ ql/src/test/queries/clientpositive/udaf_percentile_approx.q @@ -1,6 +1,26 @@ set mapred.reduce.tasks=4; set hive.exec.reducers.max=4; +set hive.map.aggr=false; +-- disable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src; + +SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src; + +SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src; + +SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src; +SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src; + +set hive.map.aggr=true; +-- enable map-side aggregation SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src; SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src; SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src; diff --git ql/src/test/results/clientpositive/count.q.out ql/src/test/results/clientpositive/count.q.out index 61e54c7..d9c5667 100644 --- ql/src/test/results/clientpositive/count.q.out +++ ql/src/test/results/clientpositive/count.q.out @@ -486,7 +486,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) expr: count() expr: count(KEY._col0:14._col0) expr: count(KEY._col0:14._col1) diff --git ql/src/test/results/clientpositive/nullgroup.q.out ql/src/test/results/clientpositive/nullgroup.q.out index 434fa6c..2d8d059 100644 --- ql/src/test/results/clientpositive/nullgroup.q.out +++ ql/src/test/results/clientpositive/nullgroup.q.out @@ -176,7 +176,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) bucketGroup: false mode: partial1 outputColumnNames: _col0 @@ -264,7 +264,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) bucketGroup: false mode: complete outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/nullgroup2.q.out ql/src/test/results/clientpositive/nullgroup2.q.out index aa52d62..9d1d6e9 100644 --- ql/src/test/results/clientpositive/nullgroup2.q.out +++ ql/src/test/results/clientpositive/nullgroup2.q.out @@ -251,7 +251,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) bucketGroup: false keys: expr: KEY._col0 @@ -362,7 +362,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) bucketGroup: false keys: expr: KEY._col0 diff --git ql/src/test/results/clientpositive/nullgroup4.q.out ql/src/test/results/clientpositive/nullgroup4.q.out index 3dd3c66..2bcc5ec 100644 --- ql/src/test/results/clientpositive/nullgroup4.q.out +++ ql/src/test/results/clientpositive/nullgroup4.q.out @@ -246,7 +246,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) expr: count(DISTINCT KEY._col0:0._col0) bucketGroup: false mode: partial1 @@ -347,7 +347,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) expr: count(DISTINCT KEY._col0:0._col0) bucketGroup: false mode: complete diff --git ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out index 9ac4bbc..6e47961 100644 --- ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out +++ ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out @@ -137,7 +137,7 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: - expr: count(VALUE._col0) + expr: count(1) expr: count(DISTINCT KEY._col0:0._col0) expr: count(DISTINCT KEY._col0:1._col0) bucketGroup: false diff --git ql/src/test/results/clientpositive/udaf_percentile_approx.q.out ql/src/test/results/clientpositive/udaf_percentile_approx.q.out index 4f85a82..d871179 100644 --- ql/src/test/results/clientpositive/udaf_percentile_approx.q.out +++ ql/src/test/results/clientpositive/udaf_percentile_approx.q.out @@ -1,8 +1,120 @@ -PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +PREHOOK: query: -- disable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +POSTHOOK: query: -- disable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +255.5 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 100) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +252.77777777777777 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5, 1000) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +255.5 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +255.5 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 100) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +252.77777777777777 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), 0.5, 1000) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +255.5 +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98)) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 100) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[24.07,252.77777777777777,476.9444444444444,487.82] +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS double), array(0.05,0.5,0.95,0.98), 1000) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98)) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 100) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[24.07,252.77777777777777,476.9444444444444,487.82] +PREHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT percentile_approx(cast(substr(src.value,5) AS int), array(0.05,0.5,0.95,0.98), 1000) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[26.0,255.5,479.0,491.0] +PREHOOK: query: -- enable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- enable map-side aggregation +SELECT percentile_approx(cast(substr(src.value,5) AS double), 0.5) FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here ####