diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 10f16ca..a8c6615 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3323,10 +3323,11 @@ private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); for (WindowExpressionSpec wExprSpec : windowExpressions) { - if (cubeRollupGrpSetPresent) { + if (!qbp.getDestToGroupBy().isEmpty()) { // Special handling of grouping function wExprSpec.setExpression(rewriteGroupingFunctionAST( - getGroupByForClause(qbp, selClauseName), wExprSpec.getExpression())); + getGroupByForClause(qbp, selClauseName), wExprSpec.getExpression(), + !cubeRollupGrpSetPresent)); } if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); @@ -3614,9 +3615,10 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); - if (cubeRollupGrpSetPresent) { + if (!qbp.getDestToGroupBy().isEmpty()) { // Special handling of grouping function - expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr); + expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr, + !cubeRollupGrpSetPresent); } ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); String recommended = recommendName(exp, colAlias); @@ -3997,11 +3999,12 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map grpByAstExprs, ASTNode targetNode) throws SemanticException { + protected static ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode, + final boolean allSet) throws SemanticException { final MutableBoolean visited = new MutableBoolean(false); final MutableBoolean found = new MutableBoolean(false); @@ -3085,10 +3085,18 @@ public Object post(Object t) { for (int i = 0; i < grpByAstExprs.size(); i++) { ASTNode grpByExpr = grpByAstExprs.get(i); if (grpByExpr.toStringTree().equals(c.toStringTree())) { - ASTNode child1 = (ASTNode) ParseDriver.adaptor.create( - HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); - ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create( - HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + ASTNode child1; + if (allSet) { + // There is no grouping_id column, we create constant + child1 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, + String.valueOf(IntMath.pow(2, grpByAstExprs.size()) - 1)); + } else { + // We refer to grouping_id column + child1 = (ASTNode) ParseDriver.adaptor.create( + HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); + ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create( + HiveParser.Identifier, VirtualColumn.GROUPINGID.getName())); + } ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral, String.valueOf(IntMath.mod(-i-1, grpByAstExprs.size()))); root.setChild(1, child1); @@ -4296,10 +4304,11 @@ static boolean isRegex(String pattern, HiveConf conf) { // We allow stateful functions in the SELECT list (but nowhere else) tcCtx.setAllowStatefulFunctions(true); tcCtx.setAllowDistinctFunctions(false); - if (!isCBOExecuted() && cubeRollupGrpSetPresent) { + if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) { // If CBO did not optimize the query, we might need to replace grouping function // Special handling of grouping function - expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr); + expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr, + !cubeRollupGrpSetPresent); } ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); String recommended = recommendName(exp, colAlias); @@ -13139,18 +13148,17 @@ private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operato Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) throws SemanticException { wSpec.validateAndMakeEffective(); - if (!isCBOExecuted()) { + if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) { // If CBO did not optimize the query, we might need to replace grouping function final String selClauseName = qb.getParseInfo().getClauseNames().iterator().next(); final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty() || !qb.getParseInfo().getDestGroupingSets().isEmpty() || !qb.getParseInfo().getDestCubes().isEmpty()); - if (cubeRollupGrpSetPresent) { - for (WindowExpressionSpec wExprSpec : wSpec.getWindowExpressions()) { - // Special handling of grouping function - wExprSpec.setExpression(rewriteGroupingFunctionAST( - getGroupByForClause(qb.getParseInfo(), selClauseName), wExprSpec.getExpression())); - } + for (WindowExpressionSpec wExprSpec : wSpec.getWindowExpressions()) { + // Special handling of grouping function + wExprSpec.setExpression(rewriteGroupingFunctionAST( + getGroupByForClause(qb.getParseInfo(), selClauseName), wExprSpec.getExpression(), + !cubeRollupGrpSetPresent)); } } diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q index 1b753e1..b1b91f9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q @@ -87,3 +87,32 @@ from T1 group by cube(key, value) having grouping(key) = 1 OR grouping(value) = 1 order by x desc, case when x = 1 then key end; + +explain +select key, value, grouping(key), grouping(value) +from T1 +group by key, value; + +select key, value, grouping(key), grouping(value) +from T1 +group by key, value; + +explain +select key, value, grouping(value) +from T1 +group by key, value; + +select key, value, grouping(value) +from T1 +group by key, value; + +explain +select key, value +from T1 +group by key, value +having grouping(key) = 1; + +select key, value +from T1 +group by key, value +having grouping(key) = 1; diff --git ql/src/test/queries/clientpositive/groupby_grouping_window.q ql/src/test/queries/clientpositive/groupby_grouping_window.q index b456074..7ba73f9 100644 --- ql/src/test/queries/clientpositive/groupby_grouping_window.q +++ ql/src/test/queries/clientpositive/groupby_grouping_window.q @@ -13,3 +13,16 @@ FROM t GROUP BY category GROUPING SETS ((), (category)) HAVING max(comments) > 0; + +SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +GROUPING SETS ((), (category)); + +SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live; + +SELECT grouping(category), lag(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live; diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index 62f40cd..9f341c7 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -765,3 +765,234 @@ NULL 2 1 2 NULL 1 3 NULL 1 4 NULL 1 +PREHOOK: query: explain +select key, value, grouping(key), grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(key), grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: tinyint), 1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(key), grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(key), grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 1 +1 1 1 1 +2 2 1 1 +3 NULL 1 1 +3 3 1 1 +4 5 1 1 +PREHOOK: query: explain +select key, value, grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), 1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, grouping(value) +from T1 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, grouping(value) +from T1 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 +1 1 1 +2 2 1 +3 NULL 1 +3 3 1 +4 5 1 +PREHOOK: query: explain +select key, value +from T1 +group by key, value +having grouping(key) = 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value +from T1 +group by key, value +having grouping(key) = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int), value (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value +from T1 +group by key, value +having grouping(key) = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from T1 +group by key, value +having grouping(key) = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL +1 1 +2 2 +3 NULL +3 3 +4 5 diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 251f4f7..e6952bd 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -151,3 +151,51 @@ NULL 0 2 1 86 0 2 1 238 0 2 1 311 0 2 1 +PREHOOK: query: SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +GROUPING SETS ((), (category)) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +GROUPING SETS ((), (category)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +0 NULL +0 NULL +0 NULL +0 NULL +PREHOOK: query: SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT grouping(category), lead(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1 0 +1 0 +1 NULL +PREHOOK: query: SELECT grouping(category), lag(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT grouping(category), lag(live) over(partition by grouping(category)) +FROM t +GROUP BY category, live +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1 NULL +1 0 +1 0