diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 2f2528445d3..37f4789d99c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1648,9 +1648,6 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_GPU_PATH_TO_EXEC = NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables"; - @Private - public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = ""; - /** * Settings to control which implementation of docker plugin for GPU will be * used. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java index 6e3cf1315ce..9e79bc2d0d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java @@ -19,8 +19,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -198,48 +198,32 @@ public synchronized void initialize(Configuration conf) throws YarnException { this.conf = conf; numOfErrorExecutionSinceLastSucceed = 0; String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC, - YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC); + DEFAULT_BINARY_NAME); if (pathToExecutable.isEmpty()) { pathToExecutable = DEFAULT_BINARY_NAME; } - // Validate file existence File binaryPath = new File(pathToExecutable); if (!binaryPath.exists()) { - // When binary not exist, use default setting. - boolean found = false; - for (String dir : DEFAULT_BINARY_SEARCH_DIRS) { - binaryPath = new File(dir, DEFAULT_BINARY_NAME); - if (binaryPath.exists()) { - found = true; - pathOfGpuBinary = binaryPath.getAbsolutePath(); - break; - } - } - - if (!found) { - LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath() - + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC - + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME); + final File lookedUpBinary = lookupBinaryInDefaultDirs(); + if (lookedUpBinary != null) { + binaryPath = lookedUpBinary; } - } else{ - // If path specified by user is a directory, use - if (binaryPath.isDirectory()) { - binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME); - LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME - + " under the directory, updated path-to-executable:" + binaryPath - .getAbsolutePath()); - } - // Validated - pathOfGpuBinary = binaryPath.getAbsolutePath(); + } else if (binaryPath.isDirectory()) { + binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME); + LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME + + " under the directory, updated path-to-executable:" + + binaryPath.getAbsolutePath()); } + pathOfGpuBinary = binaryPath.getAbsolutePath(); + // Try to discover GPU information once and print try { LOG.info("Trying to discover GPU information ..."); GpuDeviceInformation info = getGpuDeviceInformation(); - LOG.info(info.toString()); + LOG.info("Discovered GPU information: " + info.toString()); } catch (YarnException e) { String msg = "Failed to discover GPU information from system, exception message:" @@ -248,6 +232,24 @@ public synchronized void initialize(Configuration conf) throws YarnException { } } + private File lookupBinaryInDefaultDirs() { + Set triedBinaryPaths = Sets.newHashSet(); + for (String dir : DEFAULT_BINARY_SEARCH_DIRS) { + File binaryPath = new File(dir, DEFAULT_BINARY_NAME); + if (binaryPath.exists()) { + return binaryPath; + } else { + triedBinaryPaths.add(binaryPath.getAbsolutePath()); + } + } + LOG.warn("Failed to locate GPU device discovery binary, tried paths: " + + triedBinaryPaths + "! Please double check the value of config " + + YarnConfiguration.NM_GPU_PATH_TO_EXEC + + ". Using default binary: " + DEFAULT_BINARY_NAME); + + return null; + } + @VisibleForTesting protected Map getEnvironmentToRunCommand() { return environment;