diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 038487f134..731a8ac55c 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -173,6 +173,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ groupby1.q,\ groupby2.q,\ groupby3.q,\ + groupby_rollup_empty.q,\ having.q,\ identity_project_remove_skip.q,\ insert1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index d3dfd21bbc..584b28c0aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -67,8 +67,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; -import com.google.common.math.IntMath; - import javolution.util.FastBitSet; /** @@ -209,7 +207,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { heartbeatInterval = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVESENDHEARTBEAT); countAfterReport = 0; - groupingSetsPresent = conf.isGroupingSetsPresent(); ObjectInspector rowInspector = inputObjInspectors[0]; // init keyFields @@ -228,9 +225,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { // Initialize the constants for the grouping sets, so that they can be re-used for // each row + groupingSetsPresent = conf.isGroupingSetsPresent(); + groupingSets = conf.getListGroupingSets(); + groupingSetsPosition = conf.getGroupingSetPosition(); if (groupingSetsPresent) { - groupingSets = conf.getListGroupingSets(); - groupingSetsPosition = conf.getGroupingSetPosition(); newKeysGroupingSets = new IntWritable[groupingSets.size()]; groupingSetsBitSet = new FastBitSet[groupingSets.size()]; @@ -1096,7 +1094,7 @@ public void closeOp(boolean abort) throws HiveException { if (!abort) { try { // If there is no grouping key and no row came to this operator - if (firstRow && (keyFields.length == 0)) { + if (firstRow && isEmptyGroupingSetPresent()) { firstRow = false; // There is no grouping key - simulate a null row @@ -1119,8 +1117,11 @@ public void closeOp(boolean abort) throws HiveException { aggregationEvaluators[ai].aggregate(aggregations[ai], o); } - // create dummy keys - size 0 - forward(new Object[0], aggregations); + Object[] keys=new Object[outputKeyLength]; + if (groupingSetsPosition >= 0) { + keys[groupingSetsPosition] = new IntWritable((1 << groupingSetsPosition) - 1); + } + forward(keys, aggregations); } else { flush(); } @@ -1179,4 +1180,20 @@ public boolean acceptLimitPushdown() { return getConf().getMode() == GroupByDesc.Mode.MERGEPARTIAL || getConf().getMode() == GroupByDesc.Mode.COMPLETE; } + + public boolean isEmptyGroupingSetPresent() { + if (keyFields.length == 0) { + return true; + } + // groupingSets are known at map/reducer side; but have to do real processing + // hence grouppingSetsPresent is true only at map side + if (groupingSetsPosition >= 0 && groupingSets != null) { + Integer emptyGrouping = (1 << groupingSetsPosition) - 1; + if (groupingSets.contains(emptyGrouping)) { + return true; + } + } + return false; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 489a3b60c5..2b2e4ff0b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -70,7 +70,7 @@ private ArrayList keys; private List listGroupingSets; private boolean groupingSetsPresent; - private int groupingSetPosition = -1; + private int groupingSetPosition = -1; // /* in case of grouping sets; groupby1 will output values for every setgroup; this is the index of the column that information will be sent */ private ArrayList aggregators; private ArrayList outputColumnNames; private float groupByMemoryUsage; @@ -233,7 +233,7 @@ public boolean isAggregate() { } return false; } - + @Explain(displayName = "bucketGroup", displayOnlyOnTrue = true) public boolean getBucketGroup() { return bucketGroup; diff --git ql/src/test/queries/clientpositive/groupby_rollup_empty.q ql/src/test/queries/clientpositive/groupby_rollup_empty.q new file mode 100644 index 0000000000..f645fda9ef --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_rollup_empty.q @@ -0,0 +1,39 @@ +drop table if exists tx1; +create table tx1 (a integer,b integer,c integer); + +select sum(c) +from tx1 +; + +select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by a,b grouping sets ((), b, a); + +select sum(c), + grouping(b), + 'no rows' as expected +from tx1 +where a<0 +group by b grouping sets (b); + +select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b); + +-- non-empty table + +insert into tx1 values (1,1,1); + +select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b); + diff --git ql/src/test/results/clientpositive/groupby_rollup_empty.q.out ql/src/test/results/clientpositive/groupby_rollup_empty.q.out new file mode 100644 index 0000000000..849f3db60a --- /dev/null +++ ql/src/test/results/clientpositive/groupby_rollup_empty.q.out @@ -0,0 +1,107 @@ +PREHOOK: query: drop table if exists tx1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tx1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tx1 (a integer,b integer,c integer) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tx1 +POSTHOOK: query: create table tx1 (a integer,b integer,c integer) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tx1 +PREHOOK: query: select sum(c) +from tx1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c) +from tx1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by a,b grouping sets ((), b, a) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by a,b grouping sets ((), b, a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 +PREHOOK: query: select sum(c), + grouping(b), + 'no rows' as expected +from tx1 +where a<0 +group by b grouping sets (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'no rows' as expected +from tx1 +where a<0 +group by b grouping sets (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 +PREHOOK: query: insert into tx1 values (1,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@tx1 +POSTHOOK: query: insert into tx1 values (1,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tx1 +POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tx1.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tx1.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 diff --git ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out new file mode 100644 index 0000000000..7da09805f1 --- /dev/null +++ ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out @@ -0,0 +1,116 @@ +PREHOOK: query: drop table if exists tx1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists tx1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tx1 (a integer,b integer,c integer) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tx1 +POSTHOOK: query: create table tx1 (a integer,b integer,c integer) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tx1 +PREHOOK: query: select sum(c) +from tx1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c) +from tx1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by a,b grouping sets ((), b, a) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by a,b grouping sets ((), b, a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 +NULL 1 NULL,1 !!! these +NULL 1 NULL,1 !!! should +NULL 1 NULL,1 !!! not be here :) +PREHOOK: query: select sum(c), + grouping(b), + 'no rows' as expected +from tx1 +where a<0 +group by b grouping sets (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'no rows' as expected +from tx1 +where a<0 +group by b grouping sets (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 +NULL 1 NULL,1 +NULL 1 NULL,1 +NULL 1 NULL,1 +PREHOOK: query: insert into tx1 values (1,1,1) +PREHOOK: type: QUERY +PREHOOK: Output: default@tx1 +POSTHOOK: query: insert into tx1 values (1,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tx1 +POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tx1.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tx1.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx1 +#### A masked pattern was here #### +POSTHOOK: query: select sum(c), + grouping(b), + 'NULL,1' as expected +from tx1 +where a<0 +group by rollup (b) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx1 +#### A masked pattern was here #### +NULL 1 NULL,1 +NULL 1 NULL,1 +NULL 1 NULL,1 +NULL 1 NULL,1