diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 015baa1..6e072fc 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -783,7 +783,21 @@ private static void addDeprecatedKeys() { public static final String NM_VMEM_PMEM_RATIO = NM_PREFIX + "vmem-pmem-ratio"; public static final float DEFAULT_NM_VMEM_PMEM_RATIO = 2.1f; - + + /** Flag to control if total memory usage ratio needs to be exceeded + * before memory limit is enforced + */ + public static final String TOTAL_MEM_USAGE_CHECK_ENABLED = NM_PREFIX + + "total-mem-usage-check-enabled"; + public static final boolean DEFAULT_TOTAL_MEM_USAGE_CHECK_ENABLED = false; + + /** Total memory usage ratio that needs to be exceeded before memory limit + * is enforced + */ + public static final String TOTAL_MEM_USAGE_ALLOWED_RATIO = NM_PREFIX + + "total-mem-usage-allowed-ratio"; + public static final float DEFAULT_TOTAL_MEM_USAGE_ALLOWED_RATIO = 0.8f; + /** Number of Virtual CPU Cores which can be allocated for containers.*/ public static final String NM_VCORES = NM_PREFIX + "resource.cpu-vcores"; public static final int DEFAULT_NM_VCORES = 8; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 0ae4325..0b26df2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -48,7 +48,6 @@ final static Log LOG = LogFactory .getLog(ContainersMonitorImpl.class); - private long monitoringInterval; private MonitoringThread monitoringThread; private boolean containerMetricsEnabled; @@ -72,6 +71,9 @@ private boolean pmemCheckEnabled; private boolean vmemCheckEnabled; + private boolean isTotalMachineCheckEnabled; + private double totalMemUsageAllowedRatio = 0.8; + private long maxVCoresAllottedForContainers; private static final long UNKNOWN_MEMORY_LIMIT = -1L; @@ -123,6 +125,14 @@ protected void serviceInit(Configuration conf) throws Exception { YarnConfiguration.NM_VCORES, YarnConfiguration.DEFAULT_NM_VCORES); + isTotalMachineCheckEnabled = conf.getBoolean( + YarnConfiguration.TOTAL_MEM_USAGE_CHECK_ENABLED, + YarnConfiguration.DEFAULT_TOTAL_MEM_USAGE_CHECK_ENABLED); + + totalMemUsageAllowedRatio = conf.getDouble( + YarnConfiguration.TOTAL_MEM_USAGE_ALLOWED_RATIO, YarnConfiguration + .DEFAULT_TOTAL_MEM_USAGE_ALLOWED_RATIO); + // Setting these irrespective of whether checks are enabled. Required in // the UI. @@ -333,6 +343,9 @@ public MonitoringThread() { @Override public void run() { + long prevVmemStillInUsage = 0; + long prevPmemStillInUsage = 0; + while (true) { // Print the processTrees for debugging. @@ -434,6 +447,8 @@ public void run() { String msg = ""; int containerExitStatus = ContainerExitStatus.INVALID; if (isVmemCheckEnabled() + && isTotalLimitRatioExceeded(prevVmemStillInUsage, + maxVmemAllottedForContainers) && isProcessTreeOverLimit(containerId.toString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { // Container (the root process) is still alive and overflowing @@ -446,6 +461,8 @@ public void run() { isMemoryOverLimit = true; containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; } else if (isPmemCheckEnabled() + && isTotalLimitRatioExceeded(prevPmemStillInUsage, + maxPmemAllottedForContainers) && isProcessTreeOverLimit(containerId.toString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, pmemLimit)) { @@ -490,6 +507,9 @@ public void run() { } } + prevVmemStillInUsage = vmemStillInUsage; + prevPmemStillInUsage = pmemStillInUsage; + try { Thread.sleep(monitoringInterval); } catch (InterruptedException e) { @@ -561,6 +581,18 @@ public boolean isVmemCheckEnabled() { return this.vmemCheckEnabled; } + /** + * Is total limit ratio disabled or being exceeded + * @return + * @param memUsage current memUsage + * @param maxMemAllottedForContainers maximum allowed for containers + */ + public boolean isTotalLimitRatioExceeded(long memUsage, + long maxMemAllottedForContainers) { + return !isTotalMachineCheckEnabled || + memUsage > totalMemUsageAllowedRatio * maxMemAllottedForContainers; + } + @Override public void handle(ContainersMonitorEvent monitoringEvent) {