diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index 3ecbbb1..25fe059 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -703,8 +703,18 @@ private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws Seman String outputColName; // 1. Add GB Keys to reduce keys - ArrayList reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0, - gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + ArrayList reduceKeys= new ArrayList(); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + //gbInfo already has ExprNode for gbkeys + reduceKeys.add(gbInfo.gbKeys.get(i)); + String colOutputName = SemanticAnalyzer.getColumnInternalName(i); + outputKeyColumnNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false)); + colExprMap.put(colOutputName, gbInfo.gbKeys.get(i)); + } + + // Note: GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it + int keyLength = reduceKeys.size(); // 2. Add Dist UDAF args to reduce keys @@ -1002,7 +1012,7 @@ private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, int udafColStartPosInOriginalGB = gbInfo.gbKeys.size(); // the positions in rsColInfoLst are as follows // --grpkey--,--distkey--,--values-- - // but distUDAF may be before/after some non-distUDAF, + // but distUDAF may be before/after some non-distUDAF, // i.e., their positions can be mixed. // so we first process distUDAF and then non-distUDAF. // But we need to remember the sequence of udafs. diff --git a/ql/src/test/queries/clientpositive/count.q b/ql/src/test/queries/clientpositive/count.q index bb84030..41ffaf2 100644 --- a/ql/src/test/queries/clientpositive/count.q +++ b/ql/src/test/queries/clientpositive/count.q @@ -18,3 +18,19 @@ select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; + +set hive.cbo.returnpath.hiveop=true; + +explain select count(distinct b) from abcd group by a; +select count(distinct b) from abcd group by a; + +explain select count(distinct b) from abcd group by b; +select count(distinct b) from abcd group by b; + +explain select count(distinct b) from abcd group by c; +select count(distinct b) from abcd group by c; + +explain select count(b), count(distinct c) from abcd group by d; +select count(b), count(distinct c) from abcd group by d; + +set hive.cbo.returnpath.hiveop=false; diff --git a/ql/src/test/results/clientpositive/count.q.out b/ql/src/test/results/clientpositive/count.q.out index 38928b7..c950c5b 100644 --- a/ql/src/test/results/clientpositive/count.q.out +++ b/ql/src/test/results/clientpositive/count.q.out @@ -264,3 +264,253 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +1 +1 +1 +2 +PREHOOK: query: explain select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: ++ + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b, $f1 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +PREHOOK: query: explain select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), c (type: int) + outputColumnNames: b, c + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: c (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: c, $f1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), d (type: int) + outputColumnNames: c, d + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: d (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: d, $f1, $f2 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out index 9aefba7..b1ad662 100644 --- a/ql/src/test/results/clientpositive/spark/count.q.out +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -288,3 +288,317 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: a (type: int), b (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: a, b + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +1 +1 +1 +2 +PREHOOK: query: explain select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b, $f1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +PREHOOK: query: explain select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), c (type: int) + outputColumnNames: b, c + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: b (type: int), c (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: b, c + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c (type: int) + sort order: + + Map-reduce partition columns: c (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: c, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), d (type: int) + outputColumnNames: c, d + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: d (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: d, $f1, $f2 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 diff --git a/ql/src/test/results/clientpositive/tez/count.q.out b/ql/src/test/results/clientpositive/tez/count.q.out index 656b688..9fc2c75 100644 --- a/ql/src/test/results/clientpositive/tez/count.q.out +++ b/ql/src/test/results/clientpositive/tez/count.q.out @@ -292,3 +292,321 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: a (type: int), b (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: a, b + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +1 +1 +1 +2 +PREHOOK: query: explain select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b, $f1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +PREHOOK: query: explain select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), c (type: int) + outputColumnNames: b, c + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: b (type: int), c (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: b, c + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c (type: int) + sort order: + + Map-reduce partition columns: c (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: c, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), d (type: int) + outputColumnNames: c, d + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: d (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: d, $f1, $f2 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1