diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 3262887..b75d87f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -53,7 +53,6 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.ObjectPair; -import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -189,6 +188,7 @@ import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFGroupingID; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; @@ -4569,10 +4569,12 @@ private void createNewGroupingKey(List groupByKeys, List outputColumnNames, RowResolver groupByOutputRowResolver, Map colExprMap) { - // The value for the constant does not matter. It is replaced by the grouping set - // value for the actual implementation - ExprNodeConstantDesc constant = new ExprNodeConstantDesc("0"); - groupByKeys.add(constant); + // This UDF is replaced by the grouping set value for the actual implementation + GenericUDFGroupingID genericUDFGroupingID = new GenericUDFGroupingID(); + ExprNodeGenericFuncDesc placeholder = new ExprNodeGenericFuncDesc( + TypeInfoFactory.stringTypeInfo, genericUDFGroupingID, + new ArrayList(groupByKeys)); + groupByKeys.add(placeholder); String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), @@ -4581,7 +4583,7 @@ private void createNewGroupingKey(List groupByKeys, TypeInfoFactory.stringTypeInfo, null, true)); - colExprMap.put(field, constant); + colExprMap.put(field, placeholder); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGroupingID.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGroupingID.java new file mode 100644 index 0000000..a14c12b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGroupingID.java @@ -0,0 +1,34 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * This function is a placeholder for grouping_id i.e. it never gets evaluated. + * In fact, it is replaced by the group id value for the actual implementation. + */ +@Description(name = "grouping__id") +public class GenericUDFGroupingID extends GenericUDF { + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + PrimitiveCategory.STRING); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + // This function is never going to be evaluated. + // It is a placeholder. + return null; + } + + @Override + public String getDisplayString(String[] children) { + return "grouping__id"; + } +} diff --git ql/src/test/queries/clientpositive/groupby_grouping_id3.q ql/src/test/queries/clientpositive/groupby_grouping_id3.q new file mode 100644 index 0000000..c6746a8 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_id3.q @@ -0,0 +1,22 @@ +CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/results/clientpositive/groupby_grouping_id3.q.out ql/src/test/results/clientpositive/groupby_grouping_id3.q.out new file mode 100644 index 0000000..c305bfd --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_id3.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: -- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: -- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1