diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java index 752c3a6..b47edbe 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupElasticMemoryController.java @@ -94,6 +94,7 @@ boolean controlVirtual = controlVirtualMemory && !controlPhysicalMemory; Runnable oomHandlerTemp = getDefaultOOMHandler(conf, context, oomHandlerOverride, controlVirtual); + LOG.info("Using OOMHandler: " + oomHandlerTemp.getClass().getName()); if (controlPhysicalMemory && controlVirtualMemory) { LOG.warn( NM_ELASTIC_MEMORY_CONTROL_ENABLED + " is on. " + @@ -138,11 +139,10 @@ private Runnable getDefaultOOMHandler( Configuration conf, Context context, Runnable oomHandlerLocal, boolean controlVirtual) throws YarnException { - Class oomHandlerClass = - conf.getClass( - YarnConfiguration.NM_ELASTIC_MEMORY_CONTROL_OOM_HANDLER, - DefaultOOMHandler.class); if (oomHandlerLocal == null) { + Class oomHandlerClass = conf.getClass( + YarnConfiguration.NM_ELASTIC_MEMORY_CONTROL_OOM_HANDLER, + DefaultOOMHandler.class); try { Constructor constr = oomHandlerClass.getConstructor( Context.class, boolean.class); @@ -284,12 +284,15 @@ public void run() { // This loop can be exited by terminating the process // with stopListening() while ((read = events.read(event)) == event.length) { + if (LOG.isDebugEnabled()) { + LOG.debug("OOM event notification received from oom-listener"); + } // An OOM event has occurred resolveOOM(executor); } if (read != -1) { - LOG.warn(String.format("Characters returned from event hander: %d", + LOG.warn(String.format("Characters returned from event handler: %d", read)); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java index 86137b5..595aa70 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/DefaultOOMHandler.java @@ -181,6 +181,9 @@ public void run() { CGroupsHandler.CGroupController.MEMORY, "", CGROUP_PARAM_MEMORY_OOM_CONTROL); + if (LOG.isDebugEnabled()) { + LOG.debug("OOM status read from cgroups: " + status); + } if (!status.contains(CGroupsHandler.UNDER_OOM)) { break; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index c36dfd4..8bd4c47 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -1096,6 +1096,10 @@ public NMAllocationPreemptionPolicy getOverAllocationPreemptionPolicy() { private void setLatestContainersUtilization(ResourceUtilization utilization) { this.latestContainersUtilization = new ContainersResourceUtilization( utilization, Time.now()); + if (LOG.isDebugEnabled()) { + LOG.debug("Updated latest containers resource utilization to " + + latestContainersUtilization.getUtilization()); + } } /** diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java index 188a108..e4665bb 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/SnapshotBasedOverAllocationPreemptionPolicy.java @@ -20,6 +20,8 @@ import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.records.ResourceThresholds; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * An implementation of {@link NMAllocationPreemptionPolicy} based on the @@ -29,6 +31,8 @@ */ public class SnapshotBasedOverAllocationPreemptionPolicy extends NMAllocationPreemptionPolicy { + private static final Logger LOG = LoggerFactory.getLogger( + SnapshotBasedOverAllocationPreemptionPolicy.class); private final int absoluteMemoryPreemptionThresholdMb; private final float cpuPreemptionThreshold; private final int maxTimesCpuOverPreemption; @@ -52,6 +56,10 @@ public ResourceUtilization getResourcesToReclaim() { ResourceUtilization utilization = getContainersMonitor().getContainersUtilization(true).getUtilization(); + if (LOG.isDebugEnabled()) { + LOG.debug("The latest container utilization is" + utilization); + } + int memoryOverLimit = utilization.getPhysicalMemory() - absoluteMemoryPreemptionThresholdMb; float vcoreOverLimit = utilization.getCPU() - cpuPreemptionThreshold; @@ -59,6 +67,10 @@ public ResourceUtilization getResourcesToReclaim() { if (vcoreOverLimit > 0) { timesCpuOverPreemption++; if (timesCpuOverPreemption > maxTimesCpuOverPreemption) { + if (LOG.isDebugEnabled()) { + LOG.debug("CPU utilization is over the preemption threshold " + + timesCpuOverPreemption + " times consecutively."); + } timesCpuOverPreemption = 0; } else { // report no over limit for cpu if # of times CPU is over the preemption