diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index da8a3a6..6dc5720 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.sharedcache.SharedCacheUploadEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.Context; @@ -100,6 +101,7 @@ private boolean wasLaunched; private long containerLocalizationStartTime; private long containerLaunchStartTime; + private ContainerMetrics containerMetrics; private static Clock clock = SystemClock.getInstance(); /** The NM-wide configuration - not specific to this container */ @@ -147,6 +149,21 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.readLock = readWriteLock.readLock(); this.writeLock = readWriteLock.writeLock(); this.context = context; + boolean containerMetricsEnabled = + conf.getBoolean(YarnConfiguration.NM_CONTAINER_METRICS_ENABLE, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_ENABLE); + + if (containerMetricsEnabled) { + long flushPeriod = + conf.getLong(YarnConfiguration.NM_CONTAINER_METRICS_PERIOD_MS, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_PERIOD_MS); + long unrgisterDelay = conf.getLong( + YarnConfiguration.NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS, + YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS); + containerMetrics = ContainerMetrics + .forContainer(containerId, flushPeriod, unrgisterDelay); + containerMetrics.recordStartTime(clock.getTime()); + } stateMachine = stateMachineFactory.make(this); } @@ -989,6 +1006,11 @@ public void transition(ContainerImpl container, ContainerEvent event) { @SuppressWarnings("unchecked") public void transition(ContainerImpl container, ContainerEvent event) { container.metrics.releaseContainer(container.resource); + if (container.containerMetrics != null ) { + container.containerMetrics + .recordFinishTimeAndExitCode(clock.getTime(), container.exitCode); + container.containerMetrics.finished(); + } container.sendFinishedEvents(); //if the current state is NEW it means the CONTAINER_INIT was never // sent for the event, thus no need to send the CONTAINER_STOP diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index 9d17db0..7f3afe0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -30,6 +30,8 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableCounterInt; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableQuantiles; @@ -100,6 +102,15 @@ @Metric public MutableGaugeLong localizationDurationMs; + @Metric + public MutableGaugeLong startTime; + + @Metric + public MutableGaugeLong finishTime; + + @Metric + public MutableGaugeInt exitCode; + static final MetricsInfo RECORD_INFO = info("ContainerResource", "Resource limit and usage by container"); @@ -277,6 +288,15 @@ public void recordStateChangeDurations(long launchDuration, this.localizationDurationMs.set(localizationDuration); } + public void recordStartTime(long startTime) { + this.startTime.set(startTime); + } + + public void recordFinishTimeAndExitCode(long finishTime, int exitCode) { + this.finishTime.set(finishTime); + this.exitCode.set(exitCode); + } + private synchronized void scheduleTimerTaskIfRequired() { if (flushPeriodMs > 0) { // Lazily initialize timer diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 3e06236..cc98bdc 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -85,6 +85,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; @@ -333,6 +334,7 @@ public void testCleanupOnKillRequest() throws Exception { @Test public void testKillOnNew() throws Exception { WrappedContainer wc = null; + try { wc = new WrappedContainer(13, 314159265358979L, 4344, "yak"); assertEquals(ContainerState.NEW, wc.c.getContainerState()); @@ -345,6 +347,15 @@ public void testKillOnNew() throws Exception { assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics() .contains("KillRequest")); assertEquals(killed + 1, metrics.getKilledContainers()); + // check container metrics is generated. + ContainerMetrics containerMetrics = + ContainerMetrics.forContainer(wc.cId, 1, 5000); + Assert.assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, + containerMetrics.exitCode.value()); + Assert.assertTrue(containerMetrics.startTime.value() > 0); + Assert.assertTrue( + containerMetrics.finishTime.value() > containerMetrics.startTime + .value()); } finally { if (wc != null) { wc.finished();