diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java index 3ecbbb1..a892e23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java @@ -662,6 +662,7 @@ private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws Sem ArrayList colInfoLst = new ArrayList(); GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0); + //inputOpAf is always group by here so startPos as 0 works ArrayList reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); int keyLength = reduceKeys.size(); @@ -703,8 +704,21 @@ private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws Seman String outputColName; // 1. Add GB Keys to reduce keys - ArrayList reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0, - gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); + ArrayList reduceKeys= new ArrayList(); + for (int i = 0; i < gbInfo.gbKeys.size(); i++) { + //gbInfo already has ExprNode for gbkeys + reduceKeys.add(gbInfo.gbKeys.get(i)); + //TODO: is this correct? + String colOutputName = SemanticAnalyzer.getColumnInternalName(i); + outputKeyColumnNames.add(colOutputName); + colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false)); + colExprMap.put(colOutputName, gbInfo.gbKeys.get(i)); + } + + // GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it + + //ArrayList reduceKeys = getReduceKeysForRS(inputOpAf.inputs.get(0), 0, + // gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false); int keyLength = reduceKeys.size(); // 2. Add Dist UDAF args to reduce keys diff --git a/ql/src/test/queries/clientpositive/count.q b/ql/src/test/queries/clientpositive/count.q index bb84030..41ffaf2 100644 --- a/ql/src/test/queries/clientpositive/count.q +++ b/ql/src/test/queries/clientpositive/count.q @@ -18,3 +18,19 @@ select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; + +set hive.cbo.returnpath.hiveop=true; + +explain select count(distinct b) from abcd group by a; +select count(distinct b) from abcd group by a; + +explain select count(distinct b) from abcd group by b; +select count(distinct b) from abcd group by b; + +explain select count(distinct b) from abcd group by c; +select count(distinct b) from abcd group by c; + +explain select count(b), count(distinct c) from abcd group by d; +select count(b), count(distinct c) from abcd group by d; + +set hive.cbo.returnpath.hiveop=false; diff --git a/ql/src/test/results/clientpositive/count.q.out b/ql/src/test/results/clientpositive/count.q.out index 38928b7..962348c 100644 --- a/ql/src/test/results/clientpositive/count.q.out +++ b/ql/src/test/results/clientpositive/count.q.out @@ -264,3 +264,413 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +1 +1 +1 +2 +PREHOOK: query: explain select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: ++ + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b, $f1 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +PREHOOK: query: explain select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), c (type: int) + outputColumnNames: b, c + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: c (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: c, $f1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), d (type: int) + outputColumnNames: c, d + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: d (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: d, $f1, $f2 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +PREHOOK: query: DROP TABLE IF EXISTS src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src +PREHOOK: Output: default@src +POSTHOOK: query: DROP TABLE IF EXISTS src +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src +PREHOOK: query: DROP TABLE IF EXISTS src1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1 +POSTHOOK: query: DROP TABLE IF EXISTS src1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1 +PREHOOK: query: DROP TABLE IF EXISTS src_json +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_json +PREHOOK: Output: default@src_json +POSTHOOK: query: DROP TABLE IF EXISTS src_json +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_json +POSTHOOK: Output: default@src_json +PREHOOK: query: DROP TABLE IF EXISTS src_sequencefile +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_sequencefile +PREHOOK: Output: default@src_sequencefile +POSTHOOK: query: DROP TABLE IF EXISTS src_sequencefile +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_sequencefile +POSTHOOK: Output: default@src_sequencefile +PREHOOK: query: DROP TABLE IF EXISTS src_thrift +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@src_thrift +POSTHOOK: query: DROP TABLE IF EXISTS src_thrift +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@src_thrift +PREHOOK: query: DROP TABLE IF EXISTS srcbucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket +PREHOOK: Output: default@srcbucket +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket +POSTHOOK: Output: default@srcbucket +PREHOOK: query: DROP TABLE IF EXISTS srcbucket2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket2 +PREHOOK: Output: default@srcbucket2 +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket2 +POSTHOOK: Output: default@srcbucket2 +PREHOOK: query: DROP TABLE IF EXISTS srcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart +PREHOOK: Output: default@srcpart +POSTHOOK: query: DROP TABLE IF EXISTS srcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart +POSTHOOK: Output: default@srcpart +PREHOOK: query: DROP TABLE IF EXISTS primitives +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS primitives +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_j1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_j1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc +POSTHOOK: query: DROP TABLE IF EXISTS alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc +PREHOOK: query: DROP TABLE IF EXISTS cbo_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t1 +PREHOOK: Output: default@cbo_t1 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Output: default@cbo_t1 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t2 +PREHOOK: Output: default@cbo_t2 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Output: default@cbo_t2 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t3 +PREHOOK: Output: default@cbo_t3 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t3 +POSTHOOK: Output: default@cbo_t3 +PREHOOK: query: DROP TABLE IF EXISTS src_cbo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: DROP TABLE IF EXISTS src_cbo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: DROP TABLE IF EXISTS part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: DROP TABLE IF EXISTS part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE IF EXISTS lineitem +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: DROP TABLE IF EXISTS lineitem +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem diff --git a/ql/src/test/results/clientpositive/tez/count.q.out b/ql/src/test/results/clientpositive/tez/count.q.out index 656b688..1ca2fa9 100644 --- a/ql/src/test/results/clientpositive/tez/count.q.out +++ b/ql/src/test/results/clientpositive/tez/count.q.out @@ -292,3 +292,481 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int) + sort order: ++ + Map-reduce partition columns: a (type: int), b (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: a, b + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +1 +1 +1 +2 +PREHOOK: query: explain select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int) + outputColumnNames: b + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 19 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int) + sort order: + + Map-reduce partition columns: b (type: int) + Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: b, $f1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +PREHOOK: query: explain select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: int), c (type: int) + outputColumnNames: b, c + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: b (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: b (type: int), c (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: b, c + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: c (type: int) + sort order: + + Map-reduce partition columns: c (type: int) + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: c, $f1 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint) + outputColumnNames: _o__c0 + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(distinct b) from abcd group by c +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct b) from abcd group by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 +1 +1 +1 +1 +1 +1 +PREHOOK: query: explain select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), d (type: int) + outputColumnNames: c, d + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d (type: int), c (type: int) + sort order: ++ + Map-reduce partition columns: d (type: int) + Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: d, $f1, $f2 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: $f1 (type: bigint), $f2 (type: bigint) + outputColumnNames: _o__c0, _o__c1 + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(b), count(distinct c) from abcd group by d +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(b), count(distinct c) from abcd group by d +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +PREHOOK: query: DROP TABLE IF EXISTS src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src +PREHOOK: Output: default@src +POSTHOOK: query: DROP TABLE IF EXISTS src +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src +PREHOOK: query: DROP TABLE IF EXISTS src1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src1 +POSTHOOK: query: DROP TABLE IF EXISTS src1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src1 +PREHOOK: query: DROP TABLE IF EXISTS src_json +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_json +PREHOOK: Output: default@src_json +POSTHOOK: query: DROP TABLE IF EXISTS src_json +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_json +POSTHOOK: Output: default@src_json +PREHOOK: query: DROP TABLE IF EXISTS src_sequencefile +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_sequencefile +PREHOOK: Output: default@src_sequencefile +POSTHOOK: query: DROP TABLE IF EXISTS src_sequencefile +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_sequencefile +POSTHOOK: Output: default@src_sequencefile +PREHOOK: query: DROP TABLE IF EXISTS src_thrift +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_thrift +PREHOOK: Output: default@src_thrift +POSTHOOK: query: DROP TABLE IF EXISTS src_thrift +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_thrift +POSTHOOK: Output: default@src_thrift +PREHOOK: query: DROP TABLE IF EXISTS srcbucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket +PREHOOK: Output: default@srcbucket +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket +POSTHOOK: Output: default@srcbucket +PREHOOK: query: DROP TABLE IF EXISTS srcbucket2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcbucket2 +PREHOOK: Output: default@srcbucket2 +POSTHOOK: query: DROP TABLE IF EXISTS srcbucket2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcbucket2 +POSTHOOK: Output: default@srcbucket2 +PREHOOK: query: DROP TABLE IF EXISTS srcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart +PREHOOK: Output: default@srcpart +POSTHOOK: query: DROP TABLE IF EXISTS srcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart +POSTHOOK: Output: default@srcpart +PREHOOK: query: DROP TABLE IF EXISTS primitives +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS primitives +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest4_sequencefile +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_j1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_j1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS dest_g2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dest_g2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS fetchtask_ioexception +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS alltypesorc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc +POSTHOOK: query: DROP TABLE IF EXISTS alltypesorc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc +PREHOOK: query: DROP TABLE IF EXISTS cbo_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t1 +PREHOOK: Output: default@cbo_t1 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Output: default@cbo_t1 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t2 +PREHOOK: Output: default@cbo_t2 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t2 +POSTHOOK: Output: default@cbo_t2 +PREHOOK: query: DROP TABLE IF EXISTS cbo_t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cbo_t3 +PREHOOK: Output: default@cbo_t3 +POSTHOOK: query: DROP TABLE IF EXISTS cbo_t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cbo_t3 +POSTHOOK: Output: default@cbo_t3 +PREHOOK: query: DROP TABLE IF EXISTS src_cbo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_cbo +PREHOOK: Output: default@src_cbo +POSTHOOK: query: DROP TABLE IF EXISTS src_cbo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_cbo +POSTHOOK: Output: default@src_cbo +PREHOOK: query: DROP TABLE IF EXISTS part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part +PREHOOK: Output: default@part +POSTHOOK: query: DROP TABLE IF EXISTS part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE IF EXISTS lineitem +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@lineitem +PREHOOK: Output: default@lineitem +POSTHOOK: query: DROP TABLE IF EXISTS lineitem +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@lineitem +POSTHOOK: Output: default@lineitem