diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f2a6ade..b2e1c88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -53,7 +53,6 @@ import org.antlr.runtime.tree.TreeWizard.ContextVisitor; import org.apache.calcite.rel.RelNode; import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.mutable.MutableBoolean; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -3062,8 +3061,6 @@ private Operator genHavingPlan(String dest, QB qb, Operator input, protected static ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode, final boolean noneSet) throws SemanticException { - final MutableBoolean visited = new MutableBoolean(false); - final MutableBoolean found = new MutableBoolean(false); TreeVisitorAction action = new TreeVisitorAction() { @@ -3075,45 +3072,62 @@ public Object pre(Object t) { @Override public Object post(Object t) { ASTNode root = (ASTNode) t; - if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) { + if (root.getType() == HiveParser.TOK_FUNCTION) { ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0); - if (func.getText().equals("grouping")) { - ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1); - visited.setValue(true); - for (int i = 0; i < grpByAstExprs.size(); i++) { - ASTNode grpByExpr = grpByAstExprs.get(i); - if (grpByExpr.toStringTree().equals(c.toStringTree())) { - ASTNode child1; - if (noneSet) { - // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus, - // grouping should return 0 - child1 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, - String.valueOf(0)); - } else { - // We refer to grouping_id column - child1 = (ASTNode) ParseDriver.adaptor.create( - HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); - ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create( - HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + if (func.getText().equals("grouping") && func.getChildCount() == 0) { + int numberOperands = ParseDriver.adaptor.getChildCount(root); + // We implement this logic using replaceChildren instead of replacing + // the root node itself because windowing logic stores multiple + // pointers to the AST, and replacing root might lead to some pointers + // leading to non-rewritten version + ASTNode newRoot = new ASTNode(); + // Rewritten grouping function + ASTNode groupingFunc = (ASTNode) ParseDriver.adaptor.create( + HiveParser.Identifier, "grouping"); + ParseDriver.adaptor.addChild(groupingFunc, ParseDriver.adaptor.create( + HiveParser.Identifier, "rewritten")); + newRoot.addChild(groupingFunc); + // Grouping ID reference + ASTNode childGroupingID; + if (noneSet) { + // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus, + // grouping should return 0 + childGroupingID = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(0)); + } else { + // We refer to grouping_id column + childGroupingID = (ASTNode) ParseDriver.adaptor.create( + HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); + ParseDriver.adaptor.addChild(childGroupingID, ParseDriver.adaptor.create( + HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + } + newRoot.addChild(childGroupingID); + // Indices + for (int i = 1; i < numberOperands; i++) { + ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, i); + for (int j = 0; j < grpByAstExprs.size(); j++) { + ASTNode grpByExpr = grpByAstExprs.get(j); + if (grpByExpr.toStringTree().equals(c.toStringTree())) { + // Create and add AST node with position of grouping function input + // in group by clause + ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size()))); + newRoot.addChild(childN); + break; } - ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, - String.valueOf(IntMath.mod(-i-1, grpByAstExprs.size()))); - root.setChild(1, child1); - root.addChild(child2); - found.setValue(true); - break; } } + if (numberOperands + 1 != ParseDriver.adaptor.getChildCount(newRoot)) { + throw new RuntimeException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); + } + // Replace expression + root.replaceChildren(0, numberOperands - 1, newRoot); } } return t; } }; - ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); - if (visited.booleanValue() && !found.booleanValue()) { - throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg()); - } - return newTargetNode; + return (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action); } private Operator genPlanForSubQueryPredicate( diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java index cc01526..cee0e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -32,27 +31,29 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; +import org.apache.hadoop.io.IntWritable; + +import com.google.common.math.IntMath; /** * UDF grouping */ @Description(name = "grouping", -value = "_FUNC_(a, b) - Indicates whether a specified column expression in " +value = "_FUNC_(a, p1, ..., pn) - Indicates whether a specified column expression in " + "is aggregated or not. Returns 1 for aggregated or 0 for not aggregated. ", -extended = "a is the grouping id, b is the index we want to extract") +extended = "a is the grouping id, p1...pn are the indices we want to extract") @UDFType(deterministic = true) -@NDV(maxNdv = 2) public class GenericUDFGrouping extends GenericUDF { private transient IntObjectInspector groupingIdOI; - private int index = 0; - private ByteWritable byteWritable = new ByteWritable(); + private int[] indices; + private IntWritable intWritable = new IntWritable(); @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { + if (arguments.length < 2) { throw new UDFArgumentLengthException( - "grouping() requires 2 argument, got " + arguments.length); + "grouping() requires at least 2 argument, got " + arguments.length); } if (arguments[0].getCategory() != Category.PRIMITIVE) { @@ -64,27 +65,37 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } groupingIdOI = (IntObjectInspector) arguments[0]; - PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[1]; - if (!(arg2OI instanceof WritableConstantIntObjectInspector)) { - throw new UDFArgumentTypeException(1, "The second argument to grouping() must be a constant"); + indices = new int[arguments.length - 1]; + for (int i = 1; i < arguments.length; i++) { + PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[i]; + if (!(arg2OI instanceof WritableConstantIntObjectInspector)) { + throw new UDFArgumentTypeException(i, "Must be a constant"); + } + indices[i - 1] = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get(); } - index = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get(); - return PrimitiveObjectInspectorFactory.writableByteObjectInspector; + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { // groupingId = PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI); // Check that the bit at the given index is '1' or '0' - byteWritable.set((byte) - ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> index) & 1)); - return byteWritable; + int result = 0; + // grouping(c1, c2, c3) + // is equivalent to + // 4 * grouping(c1) + 2 * grouping(c2) + grouping(c3) + for (int a = 1; a < arguments.length; a++) { + result += IntMath.pow(2, indices.length - a) * + ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> indices[a - 1]) & 1); + } + intWritable.set(result); + return intWritable; } @Override public String getDisplayString(String[] children) { - assert (children.length == 2); + assert (children.length > 1); return getStandardDisplayString("grouping", children); } diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 34759ca..7157106 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -119,3 +119,39 @@ select key, value from T1 group by key, value having grouping(key) = 0; + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value); + +explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value); + +explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); + +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value); diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index b82d9c2..473d17a 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -56,7 +56,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -138,7 +138,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -227,7 +227,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean) + predicate: (grouping(_col2, 1) = 1) (type: boolean) Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) @@ -314,10 +314,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean) + predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean) Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -332,13 +332,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int) sort order: -+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -423,7 +423,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -505,7 +505,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -681,7 +681,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -696,13 +696,13 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) + key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int) sort order: -+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -787,7 +787,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint), 0 (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), 0 (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -864,7 +864,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint) + expressions: _col0 (type: int), _col1 (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -976,3 +976,341 @@ POSTHOOK: Input: default@t1 3 3 3 NULL 4 5 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL 1 2 2 +NULL 2 2 2 +NULL 3 2 2 +NULL 5 2 2 +NULL NULL 2 2 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by cube(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL 1 2 1 +NULL 2 2 1 +NULL 3 2 1 +NULL 5 2 1 +NULL NULL 2 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 1 +2 2 0 0 +2 NULL 1 1 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 1 +4 5 0 0 +4 NULL 1 1 +NULL NULL 3 3 +PREHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key) +from T1 +group by rollup(key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 0 0 +1 NULL 0 0 +1 NULL 1 2 +2 2 0 0 +2 NULL 1 2 +3 3 0 0 +3 NULL 0 0 +3 NULL 1 2 +4 5 0 0 +4 NULL 1 2 +NULL NULL 3 3