diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 98f9206..f03cd60 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -862,14 +862,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + "common group by keys, it will be optimized to generate single M/R job."), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, + HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + "is that it limits the number of mappers to the number of files."), - HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false, - "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" + - "is not converted, but a query property is set to denote the same."), HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, "Whether to enable using Column Position Alias in Group By or Order By"), HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index f758776..fe459f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -212,11 +212,7 @@ else if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) { convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth); } else if (optimizeDistincts && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { - // In test mode, dont change the query plan. However, setup a query property pGraphContext.getQueryProperties().setHasMapGroupBy(true); - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) { - return; - } ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator)groupByOp.getChildOperators().get(0); GroupByDesc childGroupByDesc = @@ -518,11 +514,7 @@ private GroupByOptimizerSortMatch matchBucketSortCols( // The operators specified by depth and removed from the tree. protected void convertGroupByMapSideSortedGroupBy( HiveConf conf, GroupByOperator groupByOp, int depth) { - // In test mode, dont change the query plan. However, setup a query property pGraphContext.getQueryProperties().setHasMapGroupBy(true); - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) { - return; - } if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file diff --git ql/src/test/queries/clientpositive/groupby_sort_8.q ql/src/test/queries/clientpositive/groupby_sort_8.q index f53295e..f0d3a59 100644 --- ql/src/test/queries/clientpositive/groupby_sort_8.q +++ ql/src/test/queries/clientpositive/groupby_sort_8.q @@ -18,10 +18,4 @@ EXPLAIN select count(distinct key) from T1; select count(distinct key) from T1; -set hive.map.groupby.sorted.testmode=true; --- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1; -select count(distinct key) from T1; - DROP TABLE T1; diff --git ql/src/test/queries/clientpositive/groupby_sort_test_1.q ql/src/test/queries/clientpositive/groupby_sort_test_1.q index 4ec138e..70eef33 100644 --- ql/src/test/queries/clientpositive/groupby_sort_test_1.q +++ ql/src/test/queries/clientpositive/groupby_sort_test_1.q @@ -2,7 +2,6 @@ set hive.enforce.bucketing = true; set hive.enforce.sorting = true; set hive.exec.reducers.max = 10; set hive.map.groupby.sorted=true; -set hive.map.groupby.sorted.testmode=true; CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; diff --git ql/src/test/results/clientpositive/groupby_sort_8.q.out ql/src/test/results/clientpositive/groupby_sort_8.q.out index 5152385..5d8f513 100644 --- ql/src/test/results/clientpositive/groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/groupby_sort_8.q.out @@ -101,70 +101,6 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@ds=1 #### A masked pattern was here #### 5 -PREHOOK: query: -- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1 -PREHOOK: type: QUERY -POSTHOOK: query: -- In testmode, the plan is not changed -EXPLAIN -select count(distinct key) from T1 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT key) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(distinct key) from T1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t1@ds=1 -#### A masked pattern was here #### -POSTHOOK: query: select count(distinct key) from T1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t1@ds=1 -#### A masked pattern was here #### -5 PREHOOK: query: DROP TABLE T1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1