diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 2ffc130..7a42c93 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -277,7 +277,7 @@ "Grouping sets aggregations (with rollups or cubes) are not allowed if aggregation function " + "parameters overlap with the aggregation functions columns"), - HIVE_GROUPING_SETS_AGGR_NOFUNC(10211, + @Deprecated HIVE_GROUPING_SETS_AGGR_NOFUNC(10211, "Grouping sets aggregations are not allowed if no aggregation function is presented"), HIVE_UNION_REMOVE_OPTIMIZATION_NEEDS_SUBDIRECTORIES(10212, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index be561ce..6d6c608 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -63,9 +63,10 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import com.google.common.math.IntMath; + import javolution.util.FastBitSet; /** @@ -171,14 +172,18 @@ */ protected transient int numEntriesHashTable; - public static FastBitSet groupingSet2BitSet(int value) { + /** + * This method returns the big-endian representation of value. + * @param value + * @param length + * @return + */ + public static FastBitSet groupingSet2BitSet(int value, int length) { FastBitSet bits = new FastBitSet(); - int index = 0; - while (value != 0) { + for (int index = length - 1; index >= 0; index--) { if (value % 2 != 0) { bits.set(index); } - ++index; value = value >>> 1; } return bits; @@ -231,7 +236,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { for (Integer groupingSet: groupingSets) { // Create the mapping corresponding to the grouping set newKeysGroupingSets[pos] = new IntWritable(groupingSet); - groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet); + groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet, groupingSetsPosition); pos++; } } @@ -770,8 +775,8 @@ public void process(Object row, int tag) throws HiveException { FastBitSet bitset = groupingSetsBitSet[groupingSetPos]; // Some keys need to be left to null corresponding to that grouping set. - for (int keyPos = bitset.nextSetBit(0); keyPos >= 0; - keyPos = bitset.nextSetBit(keyPos+1)) { + for (int keyPos = bitset.nextClearBit(0); keyPos < groupingSetsPosition; + keyPos = bitset.nextClearBit(keyPos+1)) { newKeysArray[keyPos] = cloneNewKeysArray[keyPos]; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index db8d46e..0f26fc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -250,6 +250,9 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; + +import javolution.util.FastBitSet; + import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; @@ -2621,7 +2624,7 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs Set setTransformedGroupSets = new HashSet(groupSets.size()); for(int val: groupSets) { - setTransformedGroupSets.add(convert(val)); + setTransformedGroupSets.add(convert(val, groupSet.cardinality())); } // Calcite expects the grouping sets sorted and without duplicates transformedGroupSets = new ArrayList(setTransformedGroupSets); @@ -2656,16 +2659,19 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs return aggregateRel; } - private ImmutableBitSet convert(int value) { + /* This method returns the flip big-endian representation of value */ + private ImmutableBitSet convert(int value, int length) { BitSet bits = new BitSet(); - int index = 0; - while (value != 0L) { + for (int index = length - 1; index >= 0; index--) { if (value % 2 != 0) { bits.set(index); } - ++index; value = value >>> 1; } + // We flip the bits because Calcite considers that '1' + // means that the column participates in the GroupBy + // and '0' does not, as opposed to grouping_id. + bits.flip(0, length); return ImmutableBitSet.FROM_BIT_SET.apply(bits); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 0872e53..728a7dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3918,7 +3918,7 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) { continue; } - int bitmap = 0; + int bitmap = IntMath.pow(2, groupByExpr.size()) - 1; for (int j = 0; j < child.getChildCount(); ++j) { String treeAsString = child.getChild(j).toStringTree(); Integer pos = exprPos.get(treeAsString); @@ -3927,30 +3927,22 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) generateErrorMessage((ASTNode) child.getChild(j), ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg())); } - bitmap = setBit(bitmap, pos); + bitmap = unsetBit(bitmap, groupByExpr.size() - pos - 1); } result.add(bitmap); } } - if (checkForNoAggr(result)) { - throw new SemanticException( - ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg()); - } return result; } - private boolean checkForNoAggr(List bitmaps) { - boolean ret = true; - for (int mask : bitmaps) { - ret &= mask == 0; - } - return ret; - } - public static int setBit(int bitmap, int bitIdx) { return bitmap | (1 << bitIdx); } + public static int unsetBit(int bitmap, int bitIdx) { + return bitmap & ~(1 << bitIdx); + } + /** * This function is a wrapper of parseInfo.getGroupByForClause which * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 0486b68..fd70a2c 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -224,16 +224,16 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, v POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -1 11 3 1 +1 11 0 1 1 NULL 1 1 -2 12 3 1 +2 12 0 1 2 NULL 1 1 -3 13 3 1 +3 13 0 1 3 NULL 1 1 -7 17 3 1 +7 17 0 1 7 NULL 1 1 -8 18 3 1 -8 28 3 1 +8 18 0 1 +8 28 0 1 8 NULL 1 2 NULL 11 2 1 NULL 12 2 1 @@ -241,7 +241,7 @@ NULL 13 2 1 NULL 17 2 1 NULL 18 2 1 NULL 28 2 1 -NULL NULL 0 6 +NULL NULL 3 6 PREHOOK: query: EXPLAIN SELECT key, count(distinct val) FROM T1 GROUP BY key with cube PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby_grouping_id1.q.out ql/src/test/results/clientpositive/groupby_grouping_id1.q.out index 9ef7615..c2a0393 100644 --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out @@ -22,7 +22,7 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cu POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 +NULL NULL 3 NULL 11 2 NULL 12 2 NULL 13 2 @@ -30,16 +30,16 @@ NULL 17 2 NULL 18 2 NULL 28 2 1 NULL 1 -1 11 3 +1 11 0 2 NULL 1 -2 12 3 +2 12 0 3 NULL 1 -3 13 3 +3 13 0 7 NULL 1 -7 17 3 +7 17 0 8 NULL 1 -8 18 3 -8 28 3 +8 18 0 +8 28 0 PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -48,7 +48,7 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by cube(key, val) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 +NULL NULL 3 NULL 11 2 NULL 12 2 NULL 13 2 @@ -56,16 +56,16 @@ NULL 17 2 NULL 18 2 NULL 28 2 1 NULL 1 -1 11 3 +1 11 0 2 NULL 1 -2 12 3 +2 12 0 3 NULL 1 -3 13 3 +3 13 0 7 NULL 1 -7 17 3 +7 17 0 8 NULL 1 -8 18 3 -8 28 3 +8 18 0 +8 28 0 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -74,18 +74,18 @@ POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with ro POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -0 NULL NULL +3 NULL NULL 1 1 NULL -3 1 11 +0 1 11 1 2 NULL -3 2 12 +0 2 12 1 3 NULL -3 3 13 +0 3 13 1 7 NULL -3 7 17 +0 7 17 1 8 NULL -3 8 18 -3 8 28 +0 8 18 +0 8 28 PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -94,18 +94,18 @@ POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by rollup (key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -0 NULL NULL +3 NULL NULL 1 1 NULL -3 1 11 +0 1 11 1 2 NULL -3 2 12 +0 2 12 1 3 NULL -3 3 13 +0 3 13 1 7 NULL -3 7 17 +0 7 17 1 8 NULL -3 8 18 -3 8 28 +0 8 18 +0 8 28 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -114,7 +114,7 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 +NULL NULL 3 3 NULL 11 2 2 NULL 12 2 2 NULL 13 2 2 @@ -122,16 +122,16 @@ NULL 17 2 2 NULL 18 2 2 NULL 28 2 2 1 NULL 1 1 -1 11 3 3 +1 11 0 0 2 NULL 1 1 -2 12 3 3 +2 12 0 0 3 NULL 1 1 -3 13 3 3 +3 13 0 0 7 NULL 1 1 -7 17 3 3 +7 17 0 0 8 NULL 1 1 -8 18 3 3 -8 28 3 3 +8 18 0 0 +8 28 0 0 PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by cube(key, val) PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -140,7 +140,7 @@ POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 +NULL NULL 3 3 NULL 11 2 2 NULL 12 2 2 NULL 13 2 2 @@ -148,13 +148,13 @@ NULL 17 2 2 NULL 18 2 2 NULL 28 2 2 1 NULL 1 1 -1 11 3 3 +1 11 0 0 2 NULL 1 1 -2 12 3 3 +2 12 0 0 3 NULL 1 1 -3 13 3 3 +3 13 0 0 7 NULL 1 1 -7 17 3 3 +7 17 0 0 8 NULL 1 1 -8 18 3 3 -8 28 3 3 +8 18 0 0 +8 28 0 0 diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out index 6917dba..7faf278 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out @@ -85,17 +85,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +NULL NULL 3 1 1 +1 NULL 0 0 0 1 NULL 1 0 1 -1 NULL 3 1 1 -1 1 3 1 1 +1 1 0 0 0 2 NULL 1 0 1 -2 2 3 1 1 +2 2 0 0 0 +3 NULL 0 0 0 3 NULL 1 0 1 -3 NULL 3 1 1 -3 3 3 1 1 +3 3 0 0 0 4 NULL 1 0 1 -4 5 3 1 1 +4 5 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -167,22 +167,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 NULL NULL 2 1 0 +NULL NULL 3 1 1 NULL 1 2 1 0 NULL 2 2 1 0 NULL 3 2 1 0 NULL 5 2 1 0 +1 NULL 0 0 0 1 NULL 1 0 1 -1 NULL 3 1 1 -1 1 3 1 1 +1 1 0 0 0 2 NULL 1 0 1 -2 2 3 1 1 +2 2 0 0 0 +3 NULL 0 0 0 3 NULL 1 0 1 -3 NULL 3 1 1 -3 3 3 1 1 +3 3 0 0 0 4 NULL 1 0 1 -4 5 3 1 1 +4 5 0 0 0 PREHOOK: query: explain select key, value from T1 @@ -262,16 +262,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### NULL NULL +NULL NULL NULL 1 NULL 2 NULL 3 NULL 5 -1 NULL -1 1 -2 2 -3 NULL -3 3 -4 5 PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -376,17 +371,12 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 -1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 +NULL NULL 2 NULL 5 1 NULL 3 1 NULL 2 1 +NULL 1 1 +NULL NULL 1 1 NULL 1 2 NULL 1 3 NULL 1 @@ -462,17 +452,17 @@ group by rollup(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 +NULL NULL 3 1 1 +1 NULL 0 0 0 1 NULL 1 0 1 -1 NULL 3 1 1 -1 1 3 1 1 +1 1 0 0 0 2 NULL 1 0 1 -2 2 3 1 1 +2 2 0 0 0 +3 NULL 0 0 0 3 NULL 1 0 1 -3 NULL 3 1 1 -3 3 3 1 1 +3 3 0 0 0 4 NULL 1 0 1 -4 5 3 1 1 +4 5 0 0 0 PREHOOK: query: explain select key, value, `grouping__id`, grouping(key), grouping(value) from T1 @@ -544,22 +534,22 @@ group by cube(key, value) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -NULL NULL 0 0 0 NULL NULL 2 1 0 +NULL NULL 3 1 1 NULL 1 2 1 0 NULL 2 2 1 0 NULL 3 2 1 0 NULL 5 2 1 0 +1 NULL 0 0 0 1 NULL 1 0 1 -1 NULL 3 1 1 -1 1 3 1 1 +1 1 0 0 0 2 NULL 1 0 1 -2 2 3 1 1 +2 2 0 0 0 +3 NULL 0 0 0 3 NULL 1 0 1 -3 NULL 3 1 1 -3 3 3 1 1 +3 3 0 0 0 4 NULL 1 0 1 -4 5 3 1 1 +4 5 0 0 0 PREHOOK: query: explain select key, value from T1 @@ -636,16 +626,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### NULL NULL +NULL NULL NULL 1 NULL 2 NULL 3 NULL 5 -1 NULL -1 1 -2 2 -3 NULL -3 3 -4 5 PREHOOK: query: explain select key, value, grouping(key)+grouping(value) as x from T1 @@ -750,17 +735,12 @@ order by x desc, case when x = 1 then key end POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### -4 5 2 -3 3 2 -3 NULL 2 -2 2 2 -1 1 2 -1 NULL 2 -NULL 1 1 -NULL NULL 1 +NULL NULL 2 NULL 5 1 NULL 3 1 NULL 2 1 +NULL 1 1 +NULL NULL 1 1 NULL 1 2 NULL 1 3 NULL 1 diff --git ql/src/test/results/clientpositive/groupby_grouping_window.q.out ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 202fad0..4fc36ed 100644 --- ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -168,7 +168,7 @@ POSTHOOK: Input: default@t 0 NULL 0 NULL 0 NULL -0 NULL +1 NULL PREHOOK: query: SELECT grouping(category), lead(live) over(partition by grouping(category)) FROM t GROUP BY category, live