Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java (working copy) @@ -139,7 +139,7 @@ private final ResourceLocalizationService rsrcLocalizationSrvc; private final ContainersLauncher containersLauncher; private final AuxServices auxiliaryServices; - private final NodeManagerMetrics metrics; + @VisibleForTesting final NodeManagerMetrics metrics; private final NodeStatusUpdater nodeStatusUpdater; Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java (working copy) @@ -99,6 +99,8 @@ private final List appRsrcs = new ArrayList(); + private long downloadNanos; + public ContainerImpl(Configuration conf, Dispatcher dispatcher, ContainerLaunchContext launchContext, Credentials creds, NodeManagerMetrics metrics, @@ -519,6 +521,12 @@ } } + // duration = end - start; + // record in RequestResourcesTransition: -start + // add in LocalizedTransition: +end + // + container.downloadNanos -= System.nanoTime(); + // Send requests for public, private resources Map cntrRsrc = ctxt.getLocalResources(); if (!cntrRsrc.isEmpty()) { @@ -604,9 +612,17 @@ return ContainerState.LOCALIZING; } container.localizedResources.put(rsrcEvent.getLocation(), syms); + container.metrics.localizationCacheHitMiss(rsrcEvent.getSize()); if (!container.pendingResources.isEmpty()) { return ContainerState.LOCALIZING; } + + // duration = end - start; + // record in RequestResourcesTransition: -start + // add in LocalizedTransition: +end + // + container.downloadNanos += System.nanoTime(); + container.metrics.localizationComplete(container.downloadNanos); container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.LAUNCH_CONTAINER)); Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java (working copy) @@ -25,6 +25,11 @@ private final Path loc; + // > 0: downloaded + // < 0: cached + // + private long size; + public ContainerResourceLocalizedEvent(ContainerId container, LocalResourceRequest rsrc, Path loc) { super(container, ContainerEventType.RESOURCE_LOCALIZED, rsrc); @@ -35,4 +40,12 @@ return loc; } + public long getSize() { + return size; + } + + public void setSize(long size) { + this.size = size; + } + } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java (working copy) @@ -237,9 +237,11 @@ rsrc.localPath = locEvent.getLocation(); rsrc.size = locEvent.getSize(); for (ContainerId container : rsrc.ref) { - rsrc.dispatcher.getEventHandler().handle( + final ContainerResourceLocalizedEvent localizedEvent = new ContainerResourceLocalizedEvent( - container, rsrc.rsrc, rsrc.localPath)); + container, rsrc.rsrc, rsrc.localPath); + localizedEvent.setSize(rsrc.size); + rsrc.dispatcher.getEventHandler().handle(localizedEvent); } } } @@ -274,9 +276,11 @@ ResourceRequestEvent reqEvent = (ResourceRequestEvent) event; ContainerId container = reqEvent.getContext().getContainerId(); rsrc.ref.add(container); - rsrc.dispatcher.getEventHandler().handle( + final ContainerResourceLocalizedEvent localizedEvent = new ContainerResourceLocalizedEvent( - container, rsrc.rsrc, rsrc.localPath)); + container, rsrc.rsrc, rsrc.localPath); + localizedEvent.setSize(-rsrc.size); + rsrc.dispatcher.getEventHandler().handle(localizedEvent); } } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java (working copy) @@ -17,11 +17,13 @@ */ package org.apache.hadoop.yarn.server.nodemanager.metrics; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterInt; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; @@ -41,6 +43,27 @@ MutableGaugeInt allocatedContainers; @Metric MutableGaugeInt availableGB; + @Metric("Missed localization requests in bytes") + MutableCounterLong localizedBytesMissed; + + @Metric("Cached localization requests in bytes") + MutableCounterLong localizedBytesCached; + + @Metric("Localization cache hit ratio (bytes)") + MutableGaugeInt localizedBytesCachedRatio; + + @Metric("Missed localization requests (files)") + MutableCounterLong localizedFilesMissed; + + @Metric("Cached localization requests (files)") + MutableCounterLong localizedFilesCached; + + @Metric("Localization cache hit ratio (files)") + MutableGaugeInt localizedFilesCachedRatio; + + @Metric("Localization time in nanoseconds") + MutableCounterLong localizationDownloadNanos; + public static NodeManagerMetrics create() { return create(DefaultMetricsSystem.instance()); } @@ -99,4 +122,67 @@ public void addResource(Resource res) { availableGB.incr(res.getMemory() / 1024); } + + + private void updateLocalizationHitRatios() { + updateLocalizationHitRatio(localizedBytesCached, localizedBytesMissed, + localizedBytesCachedRatio); + updateLocalizationHitRatio(localizedFilesCached, localizedFilesMissed, + localizedFilesCachedRatio); + } + + private static void updateLocalizationHitRatio(MutableCounterLong hitCounter, + MutableCounterLong missedCounter, MutableGaugeInt ratioGauge) { + final long hits = hitCounter.value(); + final long misses = missedCounter.value(); + final long total = hits + misses; + if (total > 0) { + ratioGauge.set((int)(100 * hits / total)); + } + } + + public void localizationCacheHitMiss(long size) { + if (size > 0) { + localizedBytesMissed.incr(size); + localizedFilesMissed.incr(); + updateLocalizationHitRatios(); + } else if (size < 0) { + // cached: recorded negative, restore the sign + localizedBytesCached.incr(-size); + localizedFilesCached.incr(); + updateLocalizationHitRatios(); + } + } + + public void localizationComplete(long downloadNanos) { + localizationDownloadNanos.incr(downloadNanos); + } + + @VisibleForTesting public long getLocalizedBytesMissed() { + return localizedBytesMissed.value(); + } + + @VisibleForTesting public long getLocalizedBytesCached() { + return localizedBytesCached.value(); + } + + @VisibleForTesting public long getLocalizedFilesMissed() { + return localizedFilesMissed.value(); + } + + @VisibleForTesting public long getLocalizedFilesCached() { + return localizedFilesCached.value(); + } + + @VisibleForTesting public long getLocalizationNanos() { + return localizationDownloadNanos.value(); + } + + @VisibleForTesting public int getLocalizedBytesCachedRatio() { + return localizedBytesCachedRatio.value(); + } + + @VisibleForTesting public int getLocalizedFilesCachedRatio() { + return localizedFilesCachedRatio.value(); + } } Index: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java =================================================================== --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java (revision 1552850) +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java (working copy) @@ -27,6 +27,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -35,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; @@ -251,6 +253,50 @@ BufferedReader reader = new BufferedReader(new FileReader(targetFile)); Assert.assertEquals("Hello World!", reader.readLine()); Assert.assertEquals(null, reader.readLine()); + + // + // check the localization counter + // + + // parent to include crc + Assert.assertSame("Unexpected bytes missed metric", + FileUtil.getDU(targetFile.getCanonicalFile().getParentFile()), + containerManager.metrics.getLocalizedBytesMissed()); + Assert.assertSame("Unexpected files missed metric", 1L, + containerManager.metrics.getLocalizedFilesMissed()); + Assert.assertSame("Unexpected bytes cached metric", 0L, + containerManager.metrics.getLocalizedBytesCached()); + Assert.assertSame("Unexpected files cached metric", 0L, + containerManager.metrics.getLocalizedFilesCached()); + Assert.assertTrue("Zero localization download time", + containerManager.metrics.getLocalizationNanos() > 0); + + // test cache being used + final ContainerId cid1 = createContainerId(1); + containerManager.startContainers(StartContainersRequest.newInstance( + Collections.singletonList( + StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(cid1, DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, + context.getContainerTokenSecretManager()))))); + waitForContainerState(containerManager, cid1, ContainerState.COMPLETE); + Assert.assertSame("Unexpected bytes missed metric", + FileUtil.getDU(targetFile.getCanonicalFile().getParentFile()), + containerManager.metrics.getLocalizedBytesMissed()); + Assert.assertSame("cached bytes, missed bytes", + containerManager.metrics.getLocalizedBytesMissed(), + containerManager.metrics.getLocalizedBytesCached()); + Assert.assertSame("cached files, missed files", + containerManager.metrics.getLocalizedFilesMissed(), + containerManager.metrics.getLocalizedFilesCached()); + Assert.assertTrue("Zero localization download time", + containerManager.metrics.getLocalizationNanos() > 0); + Assert.assertSame("50%, cached bytes hit", 50, + containerManager.metrics.getLocalizedBytesCachedRatio()); + Assert.assertSame("50%, cached files hit", 50, + containerManager.metrics.getLocalizedFilesCachedRatio()); } @Test