Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1656844) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -684,7 +684,7 @@ HIVEALIAS("hive.alias", "", ""), HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"), HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"), - HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts", true, + HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS("hive.optimize.multigroupby.common.distincts", false, "Whether to optimize a multi-groupby query with the same distinct.\n" + "Consider a query like:\n" + "\n" + Index: ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out (revision 1656844) +++ ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out (working copy) @@ -30,12 +30,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -44,55 +43,42 @@ TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), (key + key) (type: double) - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: double) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT value) + keys: (key + key) (type: double), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) - mode: final + mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -118,24 +104,23 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) - mode: final + mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -161,7 +146,7 @@ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-5 Stats-Aggr Operator PREHOOK: query: from src