diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index cf3d8e7..4ec37a7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -93,6 +93,7 @@ private int exitCode = ContainerExitStatus.INVALID; private final StringBuilder diagnostics; private boolean wasLaunched; + private long containerLocalizationStartTime; private long containerLaunchStartTime; private static Clock clock = new SystemClock(); @@ -489,16 +490,21 @@ private void sendLaunchEvent() { // resource usage. @SuppressWarnings("unchecked") // dispatcher not typed private void sendContainerMonitorStartEvent() { - long pmemBytes = getResource().getMemory() * 1024 * 1024L; - float pmemRatio = daemonConf.getFloat( - YarnConfiguration.NM_VMEM_PMEM_RATIO, - YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); - long vmemBytes = (long) (pmemRatio * pmemBytes); - int cpuVcores = getResource().getVirtualCores(); - - dispatcher.getEventHandler().handle( - new ContainerStartMonitoringEvent(containerId, - vmemBytes, pmemBytes, cpuVcores)); + long duration = clock.getTime() - containerLaunchStartTime; + metrics.addContainerLaunchDuration(duration); + + long pmemBytes = getResource().getMemory() * 1024 * 1024L; + float pmemRatio = daemonConf.getFloat( + YarnConfiguration.NM_VMEM_PMEM_RATIO, + YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); + long vmemBytes = (long) (pmemRatio * pmemBytes); + int cpuVcores = getResource().getVirtualCores(); + // Localization duration is the time between + // containerLocalizationStartTime and containerLaunchStartTime + dispatcher.getEventHandler().handle( + new ContainerStartMonitoringEvent(containerId, + vmemBytes, pmemBytes, cpuVcores, duration, + containerLaunchStartTime - containerLocalizationStartTime)); } private void addDiagnostics(String... diags) { @@ -597,6 +603,7 @@ public ContainerState transition(ContainerImpl container, } } + container.containerLocalizationStartTime = clock.getTime(); // Send requests for public, private resources Map cntrRsrc = ctxt.getLocalResources(); if (!cntrRsrc.isEmpty()) { @@ -748,8 +755,6 @@ public void transition(ContainerImpl container, ContainerEvent event) { container.sendContainerMonitorStartEvent(); container.metrics.runningContainer(); container.wasLaunched = true; - long duration = clock.getTime() - container.containerLaunchStartTime; - container.metrics.addContainerLaunchDuration(duration); if (container.recoveredAsKilled) { LOG.info("Killing " + container.containerId diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java index 1375da8..367c916 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java @@ -28,6 +28,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableStat; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -46,6 +47,9 @@ public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs"; public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit"; public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs"; + public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs"; + public static final String LOCALIZATION_DURATION_METRIC_NAME = + "localizationDurationMs"; private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent"; // Use a multiplier of 1000 to avoid losing too much precision when @@ -74,6 +78,12 @@ @Metric public MutableGaugeInt cpuVcoreLimit; + @Metric + public MutableGaugeLong launchDurationMs; + + @Metric + public MutableGaugeLong localizationDurationMs; + static final MetricsInfo RECORD_INFO = info("ContainerResource", "Resource limit and usage by container"); @@ -122,6 +132,10 @@ VMEM_LIMIT_METRIC_NAME, "Virtual memory limit in MBs", 0); this.cpuVcoreLimit = registry.newGauge( VCORE_LIMIT_METRIC_NAME, "CPU limit in number of vcores", 0); + this.launchDurationMs = registry.newGauge( + LAUNCH_DURATION_METRIC_NAME, "Launch duration in MS", 0L); + this.localizationDurationMs = registry.newGauge( + LOCALIZATION_DURATION_METRIC_NAME, "Localization duration in MS", 0L); } ContainerMetrics tag(MetricsInfo info, ContainerId containerId) { @@ -207,6 +221,11 @@ public void recordResourceLimit(int vmemLimit, int pmemLimit, int cpuVcores) { this.cpuVcoreLimit.set(cpuVcores); } + public void recordTime(long launchDuration, long localizationDuration) { + this.launchDurationMs.set(launchDuration); + this.localizationDurationMs.set(localizationDuration); + } + private synchronized void scheduleTimerTaskIfRequired() { if (flushPeriodMs > 0) { // Lazily initialize timer diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java index 56e2d8e..c09bebf 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java @@ -25,13 +25,18 @@ private final long vmemLimit; private final long pmemLimit; private final int cpuVcores; + private final long launchDuration; + private final long localizationDuration; public ContainerStartMonitoringEvent(ContainerId containerId, - long vmemLimit, long pmemLimit, int cpuVcores) { + long vmemLimit, long pmemLimit, int cpuVcores, long launchDuration, + long localizationDuration) { super(containerId, ContainersMonitorEventType.START_MONITORING_CONTAINER); this.vmemLimit = vmemLimit; this.pmemLimit = pmemLimit; this.cpuVcores = cpuVcores; + this.launchDuration = launchDuration; + this.localizationDuration = localizationDuration; } public long getVmemLimit() { @@ -45,4 +50,12 @@ public long getPmemLimit() { public int getCpuVcores() { return this.cpuVcores; } + + public long getLaunchDuration() { + return this.launchDuration; + } + + public long getLocalizationDuration() { + return this.localizationDuration; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index b587e46..adafc88 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -225,16 +225,20 @@ protected void serviceStop() throws Exception { private long vmemLimit; private long pmemLimit; private int cpuVcores; + private long launchDuration; + private long localizationDuration; public ProcessTreeInfo(ContainerId containerId, String pid, ResourceCalculatorProcessTree pTree, long vmemLimit, long pmemLimit, - int cpuVcores) { + int cpuVcores, long launchDuration, long localizationDuration) { this.containerId = containerId; this.pid = pid; this.pTree = pTree; this.vmemLimit = vmemLimit; this.pmemLimit = pmemLimit; this.cpuVcores = cpuVcores; + this.launchDuration = launchDuration; + this.localizationDuration = localizationDuration; } public ContainerId getContainerId() { @@ -275,6 +279,20 @@ public long getPmemLimit() { public int getCpuVcores() { return this.cpuVcores; } + + /** + * @return launch duration for the container + */ + public long getLaunchDuration() { + return this.launchDuration; + } + + /** + * @return localization duration for the container + */ + public long getLocalizationDuration() { + return this.localizationDuration; + } } @@ -424,6 +442,8 @@ public void run() { usageMetrics.recordResourceLimit( vmemLimit, pmemLimit, cpuVcores); usageMetrics.recordProcessId(pId); + usageMetrics.recordTime(ptInfo.getLaunchDuration(), + ptInfo.getLocalizationDuration()); } } } @@ -618,7 +638,8 @@ public void handle(ContainersMonitorEvent monitoringEvent) { ProcessTreeInfo processTreeInfo = new ProcessTreeInfo(containerId, null, null, startEvent.getVmemLimit(), startEvent.getPmemLimit(), - startEvent.getCpuVcores()); + startEvent.getCpuVcores(), startEvent.getLaunchDuration(), + startEvent.getLocalizationDuration()); this.containersToBeAdded.put(containerId, processTreeInfo); } break; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java index c628648..e0622e8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainerMetrics.java @@ -87,10 +87,13 @@ public void testContainerMetricsLimit() throws InterruptedException { int anyPmemLimit = 1024; int anyVmemLimit = 2048; int anyVcores = 10; + long anyLaunchDuration = 20L; + long anyLocalizationDuration = 1000L; String anyProcessId = "1234"; metrics.recordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores); metrics.recordProcessId(anyProcessId); + metrics.recordTime(anyLaunchDuration, anyLocalizationDuration); Thread.sleep(110); metrics.getMetrics(collector, true); @@ -105,6 +108,12 @@ public void testContainerMetricsLimit() throws InterruptedException { MetricsRecords.assertMetric(record, ContainerMetrics.VMEM_LIMIT_METRIC_NAME, anyVmemLimit); MetricsRecords.assertMetric(record, ContainerMetrics.VCORE_LIMIT_METRIC_NAME, anyVcores); + MetricsRecords.assertMetric(record, + ContainerMetrics.LAUNCH_DURATION_METRIC_NAME, anyLaunchDuration); + MetricsRecords.assertMetric(record, + ContainerMetrics.LOCALIZATION_DURATION_METRIC_NAME, + anyLocalizationDuration); + collector.clear(); } }