diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java index f2e5138..44d2def 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java @@ -157,6 +157,15 @@ HADOOP_YARN_HOME("HADOOP_YARN_HOME"), /** + * $LOCALIZATION_COUNTERS + * + * Since NM does not RPC Container JVM's we pass Localization counter + * vector as an environment variable + * + */ + LOCALIZATION_COUNTERS("LOCALIZATION_COUNTERS"), + + /** * $CONTAINER_ID * Final, exported by NodeManager and non-modifiable by users. */ diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index dd3deb3..85132d6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -139,7 +139,7 @@ private final ResourceLocalizationService rsrcLocalizationSrvc; private final ContainersLauncher containersLauncher; private final AuxServices auxiliaryServices; - private final NodeManagerMetrics metrics; + @VisibleForTesting final NodeManagerMetrics metrics; private final NodeStatusUpdater nodeStatusUpdater; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index e69e61a..5677d61 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -52,4 +52,13 @@ String toString(); + /** + * Vector of localization counters to be passed from NM to application + * container via environment variable {@code $LOCALIZATION_COUNTERS}. See + * {@link org.apache.hadoop.yarn.api.ApplicationConstants.Environment#LOCALIZATION_COUNTERS} + * + * @return coma-separated counter values + */ + String localizationCountersAsString(); + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 486f3ce..1b28e8e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -70,12 +70,23 @@ import org.apache.hadoop.yarn.util.ConverterUtils; public class ContainerImpl implements Container { + private static enum LocalizationCounter { + // 1-to-1 correspondence with MR TaskCounter.LOCALIZED_* + BYTES_MISSED, + BYTES_CACHED, + FILES_MISSED, + FILES_CACHED, + NANOS; + } private final Lock readLock; private final Lock writeLock; private final Dispatcher dispatcher; private final Credentials credentials; private final NodeManagerMetrics metrics; + private final long[] localizationCounts = + new long[LocalizationCounter.values().length]; + private final ContainerLaunchContext launchContext; private final ContainerTokenIdentifier containerTokenIdentifier; private final ContainerId containerId; @@ -519,6 +530,13 @@ public ContainerState transition(ContainerImpl container, } } + // duration = end - start; + // record in RequestResourcesTransition: -start + // add in LocalizedTransition: +end + // + container.localizationCounts[LocalizationCounter.NANOS.ordinal()] + = -System.nanoTime(); + // Send requests for public, private resources Map cntrRsrc = ctxt.getLocalResources(); if (!cntrRsrc.isEmpty()) { @@ -604,9 +622,33 @@ public ContainerState transition(ContainerImpl container, return ContainerState.LOCALIZING; } container.localizedResources.put(rsrcEvent.getLocation(), syms); + + final long localizedSize = rsrcEvent.getSize(); + if (localizedSize > 0) { + container.localizationCounts + [LocalizationCounter.BYTES_MISSED.ordinal()] += localizedSize; + container.localizationCounts + [LocalizationCounter.FILES_MISSED.ordinal()]++; + } else if (localizedSize < 0) { + // cached: recorded negative, restore the sign + container.localizationCounts + [LocalizationCounter.BYTES_CACHED.ordinal()] -= localizedSize; + container.localizationCounts + [LocalizationCounter.FILES_CACHED.ordinal()]++; + } + container.metrics.localizationCacheHitMiss(localizedSize); if (!container.pendingResources.isEmpty()) { return ContainerState.LOCALIZING; } + + // duration = end - start; + // record in RequestResourcesTransition: -start + // add in LocalizedTransition: +end + // + container.localizationCounts[LocalizationCounter.NANOS.ordinal()] + += System.nanoTime(); + container.metrics.localizationComplete( + container.localizationCounts[LocalizationCounter.NANOS.ordinal()]); container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.LAUNCH_CONTAINER)); @@ -893,4 +935,14 @@ public String toString() { this.readLock.unlock(); } } + + @Override + public String localizationCountersAsString() { + StringBuilder result = + new StringBuilder(String.valueOf(localizationCounts[0])); + for (int i = 1; i < localizationCounts.length; i++) { + result.append(',').append(localizationCounts[i]); + } + return result.toString(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java index 4b742b1..d5bcaa2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResourceLocalizedEvent.java @@ -25,6 +25,11 @@ private final Path loc; + // > 0: downloaded + // < 0: cached + // + private long size; + public ContainerResourceLocalizedEvent(ContainerId container, LocalResourceRequest rsrc, Path loc) { super(container, ContainerEventType.RESOURCE_LOCALIZED, rsrc); @@ -35,4 +40,12 @@ public Path getLocation() { return loc; } + public long getSize() { + return size; + } + + public void setSize(long size) { + this.size = size; + } + } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 8b08965..3e4c85c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -625,6 +625,9 @@ public void sanitizeEnv(Map environment, Path pwd, ); environment.put(Environment.PWD.name(), pwd.toString()); + + environment.put(Environment.LOCALIZATION_COUNTERS.name(), + container.localizationCountersAsString()); putEnvIfNotNull(environment, Environment.HADOOP_CONF_DIR.name(), diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java index ed110b0..faaf053 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java @@ -237,9 +237,11 @@ public void transition(LocalizedResource rsrc, ResourceEvent event) { rsrc.localPath = locEvent.getLocation(); rsrc.size = locEvent.getSize(); for (ContainerId container : rsrc.ref) { - rsrc.dispatcher.getEventHandler().handle( + final ContainerResourceLocalizedEvent localizedEvent = new ContainerResourceLocalizedEvent( - container, rsrc.rsrc, rsrc.localPath)); + container, rsrc.rsrc, rsrc.localPath); + localizedEvent.setSize(rsrc.size); + rsrc.dispatcher.getEventHandler().handle(localizedEvent); } } } @@ -274,9 +276,11 @@ public void transition(LocalizedResource rsrc, ResourceEvent event) { ResourceRequestEvent reqEvent = (ResourceRequestEvent) event; ContainerId container = reqEvent.getContext().getContainerId(); rsrc.ref.add(container); - rsrc.dispatcher.getEventHandler().handle( + final ContainerResourceLocalizedEvent localizedEvent = new ContainerResourceLocalizedEvent( - container, rsrc.rsrc, rsrc.localPath)); + container, rsrc.rsrc, rsrc.localPath); + localizedEvent.setSize(-rsrc.size); + rsrc.dispatcher.getEventHandler().handle(localizedEvent); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 4d62247..47646b7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -17,11 +17,13 @@ */ package org.apache.hadoop.yarn.server.nodemanager.metrics; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterInt; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; @@ -41,6 +43,27 @@ MutableGaugeInt allocatedContainers; @Metric MutableGaugeInt availableGB; + @Metric("Missed localization requests in bytes") + MutableCounterLong localizedBytesMissed; + + @Metric("Cached localization requests in bytes") + MutableCounterLong localizedBytesCached; + + @Metric("Localization cache hit ratio (bytes)") + MutableGaugeInt localizedBytesCachedRatio; + + @Metric("Missed localization requests (files)") + MutableCounterLong localizedFilesMissed; + + @Metric("Cached localization requests (files)") + MutableCounterLong localizedFilesCached; + + @Metric("Localization cache hit ratio (files)") + MutableGaugeInt localizedFilesCachedRatio; + + @Metric("Localization time in nanoseconds") + MutableCounterLong localizationDownloadNanos; + public static NodeManagerMetrics create() { return create(DefaultMetricsSystem.instance()); } @@ -99,4 +122,67 @@ public void releaseContainer(Resource res) { public void addResource(Resource res) { availableGB.incr(res.getMemory() / 1024); } + + + private void updateLocalizationHitRatios() { + updateLocalizationHitRatio(localizedBytesCached, localizedBytesMissed, + localizedBytesCachedRatio); + updateLocalizationHitRatio(localizedFilesCached, localizedFilesMissed, + localizedFilesCachedRatio); + } + + private static void updateLocalizationHitRatio(MutableCounterLong hitCounter, + MutableCounterLong missedCounter, MutableGaugeInt ratioGauge) { + final long hits = hitCounter.value(); + final long misses = missedCounter.value(); + final long total = hits + misses; + if (total > 0) { + ratioGauge.set((int)(100 * hits / total)); + } + } + + public void localizationCacheHitMiss(long size) { + if (size > 0) { + localizedBytesMissed.incr(size); + localizedFilesMissed.incr(); + updateLocalizationHitRatios(); + } else if (size < 0) { + // cached: recorded negative, restore the sign + localizedBytesCached.incr(-size); + localizedFilesCached.incr(); + updateLocalizationHitRatios(); + } + } + + public void localizationComplete(long downloadNanos) { + localizationDownloadNanos.incr(downloadNanos); + } + + @VisibleForTesting public long getLocalizedBytesMissed() { + return localizedBytesMissed.value(); + } + + @VisibleForTesting public long getLocalizedBytesCached() { + return localizedBytesCached.value(); + } + + @VisibleForTesting public long getLocalizedFilesMissed() { + return localizedFilesMissed.value(); + } + + @VisibleForTesting public long getLocalizedFilesCached() { + return localizedFilesCached.value(); + } + + @VisibleForTesting public long getLocalizationNanos() { + return localizationDownloadNanos.value(); + } + + @VisibleForTesting public int getLocalizedBytesCachedRatio() { + return localizedBytesCachedRatio.value(); + } + + @VisibleForTesting public int getLocalizedFilesCachedRatio() { + return localizedFilesCachedRatio.value(); + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index f62cd50..73fe9c0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -27,6 +27,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -35,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; @@ -251,6 +253,50 @@ public void testContainerSetup() throws Exception { BufferedReader reader = new BufferedReader(new FileReader(targetFile)); Assert.assertEquals("Hello World!", reader.readLine()); Assert.assertEquals(null, reader.readLine()); + + // + // check the localization counter + // + + // parent to include crc + Assert.assertSame("Unexpected bytes missed metric", + FileUtil.getDU(targetFile.getCanonicalFile().getParentFile()), + containerManager.metrics.getLocalizedBytesMissed()); + Assert.assertSame("Unexpected files missed metric", 1L, + containerManager.metrics.getLocalizedFilesMissed()); + Assert.assertSame("Unexpected bytes cached metric", 0L, + containerManager.metrics.getLocalizedBytesCached()); + Assert.assertSame("Unexpected files cached metric", 0L, + containerManager.metrics.getLocalizedFilesCached()); + Assert.assertTrue("Zero localization download time", + containerManager.metrics.getLocalizationNanos() > 0); + + // test cache being used + final ContainerId cid1 = createContainerId(1); + containerManager.startContainers(StartContainersRequest.newInstance( + Collections.singletonList( + StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(cid1, DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, + context.getContainerTokenSecretManager()))))); + waitForContainerState(containerManager, cid1, ContainerState.COMPLETE); + Assert.assertSame("Unexpected bytes missed metric", + FileUtil.getDU(targetFile.getCanonicalFile().getParentFile()), + containerManager.metrics.getLocalizedBytesMissed()); + Assert.assertSame("cached bytes, missed bytes", + containerManager.metrics.getLocalizedBytesMissed(), + containerManager.metrics.getLocalizedBytesCached()); + Assert.assertSame("cached files, missed files", + containerManager.metrics.getLocalizedFilesMissed(), + containerManager.metrics.getLocalizedFilesCached()); + Assert.assertTrue("Zero localization download time", + containerManager.metrics.getLocalizationNanos() > 0); + Assert.assertSame("50%, cached bytes hit", 50, + containerManager.metrics.getLocalizedBytesCachedRatio()); + Assert.assertSame("50%, cached files hit", 50, + containerManager.metrics.getLocalizedFilesCachedRatio()); } @Test diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index a021214..55ba4e5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -117,6 +117,11 @@ public String toString() { } @Override + public String localizationCountersAsString() { + return null; + } + + @Override public void handle(ContainerEvent event) { }