Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1463842) +++ conf/hive-default.xml.template (working copy) @@ -534,6 +534,15 @@ + hive.map.groupby.sorted.testmode + false + If the bucketing/sorting properties of the table exactly match the grouping key, whether to + perform the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan + is not converted, but a query property is set to denote the same. + + + + hive.new.job.grouping.set.cardinality 30 Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1463842) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -423,6 +423,7 @@ HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5), HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true), HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false), + HIVE_MAP_GROUPBY_SORT_TESTMODE("hive.map.groupby.sorted.testmode", false), HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false), HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30), @@ -765,7 +766,7 @@ // ptf partition constants HIVE_PTF_PARTITION_PERSISTENCE_CLASS("hive.ptf.partition.persistence", "org.apache.hadoop.hive.ql.exec.PTFPersistence$PartitionedByteBasedList"), - HIVE_PTF_PARTITION_PERSISTENT_SIZE("hive.ptf.partition.persistence.memsize", + HIVE_PTF_PARTITION_PERSISTENT_SIZE("hive.ptf.partition.persistence.memsize", (int) Math.pow(2, (5 + 10 + 10)) ), // 32MB ; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (revision 1463842) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (working copy) @@ -178,7 +178,7 @@ // Dont remove the operator for distincts if (useMapperSort && !groupByOp.getConf().isDistinct() && (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) { - convertGroupByMapSideSortedGroupBy(groupByOp, depth); + convertGroupByMapSideSortedGroupBy(hiveConf, groupByOp, depth); } else if ((match == GroupByOptimizerSortMatch.PARTIAL_MATCH) || (match == GroupByOptimizerSortMatch.COMPLETE_MATCH)) { @@ -455,7 +455,14 @@ // Convert the group by to a map-side group by // The operators specified by depth and removed from the tree. - protected void convertGroupByMapSideSortedGroupBy(GroupByOperator groupByOp, int depth) { + protected void convertGroupByMapSideSortedGroupBy( + HiveConf conf, GroupByOperator groupByOp, int depth) { + // In test mode, dont change the query plan. However, setup a query property + pGraphContext.getQueryProperties().setHasMapGroupBy(true); + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT_TESTMODE)) { + return; + } + if (groupByOp.removeChildren(depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file groupByOp.setUseBucketizedHiveInputFormat(true); Index: ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (revision 1463842) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java (working copy) @@ -46,6 +46,7 @@ boolean hasDistributeBy = false; boolean hasClusterBy = false; boolean mapJoinRemoved = false; + boolean hasMapGroupBy = false; public boolean hasJoin() { return hasJoin; @@ -134,4 +135,12 @@ public void setMapJoinRemoved(boolean mapJoinRemoved) { this.mapJoinRemoved = mapJoinRemoved; } + + public boolean isHasMapGroupBy() { + return hasMapGroupBy; + } + + public void setHasMapGroupBy(boolean hasMapGroupBy) { + this.hasMapGroupBy = hasMapGroupBy; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1463842) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -352,7 +352,8 @@ listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, - reduceSinkOperatorsAddedByEnforceBucketingSorting); + reduceSinkOperatorsAddedByEnforceBucketingSorting, + queryProperties); } @SuppressWarnings("nls") @@ -8660,7 +8661,7 @@ listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, - reduceSinkOperatorsAddedByEnforceBucketingSorting); + reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); // Generate table access stats if required if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS) == true) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 1463842) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -113,6 +114,7 @@ private List> rootTasks; private FetchTask fetchTask; + private QueryProperties queryProperties; public ParseContext() { } @@ -180,7 +182,8 @@ HashSet semanticInputs, List> rootTasks, Map> opToPartToSkewedPruner, Map viewAliasToInput, - List reduceSinkOperatorsAddedByEnforceBucketingSorting) { + List reduceSinkOperatorsAddedByEnforceBucketingSorting, + QueryProperties queryProperties) { this.conf = conf; this.qb = qb; this.ast = ast; @@ -212,6 +215,7 @@ this.viewAliasToInput = viewAliasToInput; this.reduceSinkOperatorsAddedByEnforceBucketingSorting = reduceSinkOperatorsAddedByEnforceBucketingSorting; + this.queryProperties = queryProperties; } /** @@ -623,4 +627,12 @@ public Map getViewAliasToInput() { return viewAliasToInput; } + + public QueryProperties getQueryProperties() { + return queryProperties; + } + + public void setQueryProperties(QueryProperties queryProperties) { + this.queryProperties = queryProperties; + } }