diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 975e108..91fb6c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -3130,7 +3130,7 @@ public static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSu public static int estimateReducers(long totalInputFileSize, long bytesPerReducer, int maxReducers, boolean powersOfTwo) { - int reducers = (int) ((totalInputFileSize + bytesPerReducer - 1) / bytesPerReducer); + int reducers = (int) ((Math.max(totalInputFileSize, bytesPerReducer)) / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java index fef2c29..ccccadf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetReducerParallelism.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory; import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; @@ -88,6 +89,7 @@ public Object process(Node nd, Stack stack, } } + numberOfBytes = StatsRulesProcFactory.setMaxIfInvalid(numberOfBytes); int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer, maxReducers, false); LOG.info("Set parallelism for reduce sink "+sink+" to: "+numReducers); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index d49eddb..6167b8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -957,6 +957,8 @@ private boolean checkMapSideAggregation(GroupByOperator gop, // estimated hash table size long estHashTableSize = numEstimatedRows * hashEntrySize; + + estHashTableSize = setMaxIfInvalid(estHashTableSize); if (estHashTableSize < maxMemHashAgg) { return true; @@ -1860,7 +1862,7 @@ static boolean satisfyPrecondition(Statistics stats) { * @param val - input value * @return Long.MAX_VALUE if val is negative else val */ - static long setMaxIfInvalid(long val) { + public static long setMaxIfInvalid(long val) { return val < 0 ? Long.MAX_VALUE : val; } }