commit 762508d670d9000416f01642735b461802fa63bf Author: Vinod Kumar Vavilapalli Date: Thu Feb 20 14:48:03 2014 -0800 YARN-1398. CS deadlock diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 968d373..5958eec 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -50,7 +50,6 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; -import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @@ -1410,12 +1409,14 @@ public void completedContainer(Resource clusterResource, FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue) { if (application != null) { + + boolean removed = false; + // Careful! Locking order is important! synchronized (this) { Container container = rmContainer.getContainer(); - boolean removed = false; // Inform the application & the node // Note: It's safe to assume that all state changes to RMContainer // happen under scheduler's lock... @@ -1441,13 +1442,14 @@ public void completedContainer(Resource clusterResource, " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + usedResources + " cluster=" + clusterResource); - // Inform the parent queue - getParent().completedContainer(clusterResource, application, - node, rmContainer, null, event, this); } } - + if (removed) { + // Inform the parent queue _outside_ of the leaf-queue lock + getParent().completedContainer(clusterResource, application, node, + rmContainer, null, event, this); + } } }