diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 005fa71..e4190be 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -17,9 +17,9 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -35,6 +35,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; @@ -52,6 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerChangeResourceEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; @@ -472,6 +474,7 @@ public Resource getCurrentConsumption() { return attemptResourceUsage.getUsed(); } + @SuppressWarnings("unchecked") private Container updateContainerAndNMToken(RMContainer rmContainer, boolean newContainer, boolean increasedContainer) { Container container = rmContainer.getContainer(); @@ -495,6 +498,16 @@ private Container updateContainerAndNMToken(RMContainer rmContainer, updatedNMTokens.add(nmToken); } } catch (IllegalArgumentException e) { + if (e.getCause() instanceof UnknownHostException) { + // Failing application attempt so that attempt could get assigned to + // another NM + rmContext + .getDispatcher() + .getEventHandler() + .handle( + new RMAppAttemptFailedEvent(attemptId, StringUtils + .stringifyException(e))); + } // DNS might be down, skip returning this container. LOG.error("Error trying to assign container token and NM token to" + " an updated container " + container.getId(), e);