diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index ab7eee8..a147a45 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -889,16 +888,12 @@ private boolean checkMapSideAggregation(GroupByOperator gop, GroupByDesc.Mode mode = desc.getMode(); if (mode.equals(GroupByDesc.Mode.HASH)) { - float hashAggMem = conf.getFloatVar( - HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); - float hashAggMaxThreshold = conf.getFloatVar( - HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); - - // get memory for container. May be use mapreduce.map.java.opts instead? - long totalMemory = - DagUtils.getContainerResource(conf).getMemory() * 1000L * 1000L; - long maxMemHashAgg = Math - .round(totalMemory * hashAggMem * hashAggMaxThreshold); + float hashAggMem = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + float hashAggMaxThreshold = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + + // get available map memory + long totalMemory = StatsUtils.getAvailableMemory(conf) * 1000L * 1000L; + long maxMemHashAgg = Math.round(totalMemory * hashAggMem * hashAggMaxThreshold); // estimated number of rows will be product of NDVs long numEstimatedRows = 1; diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 5143fbe..eb46e32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -86,6 +87,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.io.BytesWritable; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; import com.google.common.base.Joiner; import com.google.common.collect.Lists; @@ -1350,4 +1352,11 @@ private static void getTableAliasFromExprNode(ExprNodeDesc end, } } + + public static long getAvailableMemory(Configuration conf) { + int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) > 0 ? + HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE) : + conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); + return memory; + } }