diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index bba14e2..b8b0f45 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -38,6 +38,7 @@ import java.util.Locale; import java.util.Properties; import java.util.StringTokenizer; +import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.collect.Interner; @@ -975,4 +976,47 @@ public static String getPartitionValWithInvalidCharacter(List partVals, return null; } + + /** + * Parse a Java opts string, try to find the rightmost -Xmx option value (since there may be more than one) + * @param javaOpts Java opts string to parse + * @return the rightmost -Xmx value in bytes. If Xmx is not set, return -1 + */ + public static long parseRightmostXmx(String javaOpts) { + // Find the last matching -Xmx following word boundaries + // Format: -Xmx[g|G|m|M|k|K] + Pattern JAVA_OPTS_XMX_PATTERN = Pattern.compile(".*(?:^|\\s)-Xmx(\\d+)([gGmMkK]?)(?:$|\\s).*"); + Matcher m = JAVA_OPTS_XMX_PATTERN.matcher(javaOpts); + + if (m.matches()) { + long size = Long.parseLong(m.group(1)); + if (size <= 0) { + return -1; + } + + if (m.group(2).isEmpty()) { + // -Xmx specified in bytes + return size; + } + + char unit = m.group(2).charAt(0); + switch (unit) { + case 'k': + case 'K': + // -Xmx specified in KB + return size * 1024; + case 'm': + case 'M': + // -Xmx specified in MB + return size * 1024 * 1024; + case 'g': + case 'G': + // -Xmx speficied in GB + return size * 1024 * 1024 * 1024; + } + } + + // -Xmx not specified + return -1; + } } diff --git common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java index 6bd7037..bf1cd74 100644 --- common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java +++ common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java @@ -61,4 +61,52 @@ public void splitAndUnEscapeTestCase(String testValue, String[] expectedResults) assertTrue(Arrays.toString(expectedResults) + " == " + Arrays.toString(testResults), Arrays.equals(expectedResults, testResults)); } + + @Test + public void testParseRightmostXmx() throws Exception { + // Empty java opts + String javaOpts = ""; + long heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", -1, heapSize); + + // Non-empty java opts without -Xmx specified + javaOpts = "-Xms1024m"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", -1, heapSize); + + // Non-empty java opts with -Xmx specified in GB + javaOpts = "-Xms1024m -Xmx2g"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", 2147483648L, heapSize); + + // Non-empty java opts with -Xmx specified in MB + javaOpts = "-Xms1024m -Xmx1024m"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", 1073741824, heapSize); + + // Non-empty java opts with -Xmx specified in KB + javaOpts = "-Xms1024m -Xmx524288k"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", 536870912, heapSize); + + // Non-empty java opts with -Xmx specified in B + javaOpts = "-Xms1024m -Xmx1610612736"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", 1610612736, heapSize); + + // Non-empty java opts with -Xmx specified twice + javaOpts = "-Xmx1024m -Xmx1536m"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", 1610612736, heapSize); + + // Non-empty java opts with bad -Xmx specification + javaOpts = "pre-Xmx1024m"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", -1, heapSize); + + // Non-empty java opts with bad -Xmx specification + javaOpts = "-Xmx1024m-post"; + heapSize = HiveStringUtils.parseRightmostXmx(javaOpts); + assertEquals("Unexpected maximum heap size", -1, heapSize); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 2b93e01..48806bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -59,9 +59,12 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.tez.dag.api.TezConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hive.common.util.HiveStringUtils.parseRightmostXmx; + /** * ConvertJoinMapJoin is an optimization that replaces a common join * (aka shuffle join) with a map join (aka broadcast or fragment replicate @@ -88,6 +91,15 @@ JoinOperator joinOp = (JoinOperator) nd; long maxSize = context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + LOG.info("Memory requested for conversion: " + maxSize); + // When the requested noconditional task size is bigger than the actual memory available from Tez, + // we don't want to take the risk of converting it to MapJoin and having potential OOM later + if (maxSize >= calcActualMemoryThreshold(context.conf)) { + LOG.warn("hive.auto.convert.join.noconditionaltask.size is greater than the actual memory" + + " available! Cannot convert a join into map join."); + return null; + //TODO when Tez support is available, we can ask for the amount of memory we need + } TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { @@ -885,4 +897,42 @@ private void fallbackToReduceSideJoin(JoinOperator joinOp, OptimizeTezProcContex LOG.info("Fallback to common merge join operator"); convertJoinSMBJoin(joinOp, context, pos, 0, false); } + + /** + * Calculate the actual available memory assigned from Tez side + * @return the actual memory threshold + */ + private long calcActualMemoryThreshold(HiveConf conf) { + long actualMemoryThreshold = -1; + long containerSize = conf.getLongVar(HiveConf.ConfVars.HIVETEZCONTAINERSIZE); + assert (containerSize > 0); + + double tezHeapFraction = conf.getDouble(TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION, + TezConfiguration.TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION_DEFAULT); + double tezReserveFraction = conf.getDouble(TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION, + TezConfiguration.TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION_DEFAULT); + + /* Find the rightmost -Xmx param among multiple Java options. Why rightmost? + $ java -Xmx1G -XX:+PrintFlagsFinal -Xmx2G 2>/dev/null | grep MaxHeapSize + uintx MaxHeapSize := 2147483648 {product} + */ + String hiveTezOpts = conf.getVar(HiveConf.ConfVars.HIVETEZJAVAOPTS); + String tezClusterDefaultOpts = conf.get(TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS, + TezConfiguration.TEZ_TASK_LAUNCH_CLUSTER_DEFAULT_CMD_OPTS_DEFAULT); + String tezOpts = conf.get(TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS, + TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS_DEFAULT); + + // Merge the above three Java Opts strings into one + String javaOpts = hiveTezOpts + " " + tezClusterDefaultOpts + " " + tezOpts; + long xmx = parseRightmostXmx(javaOpts); + + if (xmx <= 0) { + actualMemoryThreshold = (long) (tezHeapFraction * containerSize); + } else { + actualMemoryThreshold = (long) (tezReserveFraction * xmx); + } + + LOG.info("Actual memory allocated: " + actualMemoryThreshold); + return actualMemoryThreshold; + } }