diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java
index 021863b..d975fd0 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java
@@ -68,15 +68,6 @@ public void testHalfOpportunisticMaps() throws Exception {
doTest(4, 1, 1, 2);
}
- /**
- * Test will run with 6 Maps and 2 Reducers. All the Maps are OPPORTUNISTIC.
- * @throws Exception
- */
- @Test
- public void testMultipleReducers() throws Exception {
- doTest(6, 2, 1, 6);
- }
-
public void doTest(int numMappers, int numReducers, int numNodes,
int percent) throws Exception {
doTest(numMappers, numReducers, numNodes, 1000, percent);
@@ -94,7 +85,8 @@ public void doTest(int numMappers, int numReducers, int numNodes,
conf.setBoolean(YarnConfiguration.
OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true);
conf.setBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, true);
- conf.setBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED, true);
+ conf.setInt(
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10);
dfsCluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(numNodes).build();
fileSystem = dfsCluster.getFileSystem();
@@ -104,11 +96,7 @@ public void doTest(int numMappers, int numReducers, int numNodes,
createInput(fileSystem, numMappers, numLines);
// Run the test.
- Configuration jobConf = mrCluster.getConfig();
- jobConf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS,
- YarnConfiguration.DEFAULT_AMRM_PROXY_ADDRESS);
-
- runMergeTest(new JobConf(jobConf), fileSystem,
+ runMergeTest(new JobConf(conf), fileSystem,
numMappers, numReducers, numLines, percent);
} finally {
if (dfsCluster != null) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java
index f88fa3b..0207010 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java
@@ -72,4 +72,10 @@
*/
public static final int KILLED_AFTER_APP_COMPLETION = -107;
+ /**
+ * Container was terminated by the ContainerScheduler to make room
+ * for another container...
+ */
+ public static final int KILLED_BY_CONTAINER_SCHEDULER = -108;
+
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
index 582389f..4efd8c1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
@@ -36,6 +36,6 @@
/** Completed container */
COMPLETE,
- /** Queued at the NM. */
- QUEUED
+ /** Scheduled (awaiting resources) at the NM. */
+ SCHEDULED
}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 1a30c32..d080554 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -824,10 +824,11 @@ public static boolean isAclEnabled(Configuration conf) {
/** Prefix for all node manager configs.*/
public static final String NM_PREFIX = "yarn.nodemanager.";
- /** Enable Queuing of OPPORTUNISTIC containers. */
- public static final String NM_CONTAINER_QUEUING_ENABLED = NM_PREFIX
- + "container-queuing-enabled";
- public static final boolean NM_CONTAINER_QUEUING_ENABLED_DEFAULT = false;
+ /** Max Queue length of OPPORTUNISTIC containers on the NM. */
+ public static final String NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH =
+ NM_PREFIX + "opportunistic-containers-max-queue-length";
+ public static final int NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH_DEFAULT =
+ 0;
/** Environment variables that will be sent to containers.*/
public static final String NM_ADMIN_USER_ENV = NM_PREFIX + "admin-env";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 9c746fd..be8ffbd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -82,7 +82,7 @@ enum ContainerStateProto {
C_NEW = 1;
C_RUNNING = 2;
C_COMPLETE = 3;
- C_QUEUED = 4;
+ C_SCHEDULED = 4;
}
message ContainerProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java
index 4cfc4eb..b552d19 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java
@@ -108,7 +108,8 @@ public void doBefore() throws Exception {
conf.setBoolean(YarnConfiguration.
OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true);
conf.setBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, true);
- conf.setBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED, true);
+ conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH,
+ 10);
cluster.init(conf);
cluster.start();
yarnConf = cluster.getConfig();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
index 3640883..d211d6d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java
@@ -36,6 +36,7 @@
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.Service.STATE;
+import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -330,6 +331,12 @@ private void testContainerManagement(NMClientImpl nmClient,
ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
ContainerLaunchContext clc =
Records.newRecord(ContainerLaunchContext.class);
+ if (Shell.WINDOWS) {
+ clc.setCommands(
+ Arrays.asList("ping", "-n", "100", "127.0.0.1", ">nul"));
+ } else {
+ clc.setCommands(Arrays.asList("sleep", "10"));
+ }
clc.setTokens(securityTokens);
try {
nmClient.startContainer(container, clc);
@@ -415,7 +422,7 @@ private void testIncreaseContainerResource(Container container)
try {
nmClient.increaseContainerResource(container);
} catch (YarnException e) {
- // NM container will only be in LOCALIZED state, so expect the increase
+ // NM container will only be in SCHEDULED state, so expect the increase
// action to fail.
if (!e.getMessage().contains(
"can only be changed when a container is in RUNNING state")) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java
index b9b4b02..1e7f1cd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java
@@ -111,6 +111,8 @@ public static void setup() throws Exception {
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
conf.setBoolean(
YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true);
+ conf.setInt(
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10);
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
yarnCluster =
new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 72e026d..ec072e6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -987,10 +987,10 @@
- Enable Queuing of OPPORTUNISTIC containers on the
+ Max numbed of OPPORTUNISTIC containers to queue at the
nodemanager.
- yarn.nodemanager.container-queuing-enabled
- false
+ yarn.nodemanager.opportunistic-containers-max-queue-length
+ 0
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index e888393..16a8497 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -27,12 +27,12 @@
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
@@ -47,15 +47,6 @@
public interface Context {
/**
- * Interface exposing methods related to the queuing of containers in the NM.
- */
- interface QueuingContext {
- ConcurrentMap getQueuedContainers();
-
- ConcurrentMap getKilledQueuedContainers();
- }
-
- /**
* Return the nodeId. Usable only when the ContainerManager is started.
*
* @return the NodeId
@@ -112,13 +103,6 @@
NodeStatusUpdater getNodeStatusUpdater();
- /**
- * Returns a QueuingContext that provides information about the
- * number of Containers Queued as well as the number of Containers that were
- * queued and killed.
- */
- QueuingContext getQueuingContext();
-
boolean isDistributedSchedulingEnabled();
OpportunisticContainerAllocator getContainerAllocator();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 37f67c4..052e53b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -56,7 +56,6 @@
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
@@ -64,7 +63,7 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
@@ -111,6 +110,8 @@
private boolean rmWorkPreservingRestartEnabled;
private boolean shouldExitOnShutdownEvent = false;
+ private ContainerScheduler containerScheduler;
+
public NodeManager() {
super(NodeManager.class.getName());
}
@@ -177,14 +178,8 @@ protected ContainerManagerImpl createContainerManager(Context context,
ContainerExecutor exec, DeletionService del,
NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager,
LocalDirsHandlerService dirsHandler) {
- if (getConfig().getBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED,
- YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED_DEFAULT)) {
- return new QueuingContainerManagerImpl(context, exec, del,
- nodeStatusUpdater, metrics, dirsHandler);
- } else {
- return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
- metrics, dirsHandler);
- }
+ return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
+ metrics, dirsHandler);
}
protected NMCollectorService createNMCollectorService(Context ctxt) {
@@ -510,7 +505,6 @@ public void run() {
private OpportunisticContainerAllocator containerAllocator;
- private final QueuingContext queuingContext;
private ContainerExecutor executor;
private NMTimelinePublisher nmTimelinePublisher;
@@ -533,7 +527,6 @@ public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
this.stateStore = stateStore;
this.logAggregationReportForApps = new ConcurrentLinkedQueue<
LogAggregationReport>();
- this.queuingContext = new QueuingNMContext();
this.isDistSchedulingEnabled = isDistSchedulingEnabled;
this.conf = conf;
}
@@ -662,11 +655,6 @@ public void setNodeStatusUpdater(NodeStatusUpdater nodeStatusUpdater) {
this.nodeStatusUpdater = nodeStatusUpdater;
}
- @Override
- public QueuingContext getQueuingContext() {
- return this.queuingContext;
- }
-
public boolean isDistributedSchedulingEnabled() {
return isDistSchedulingEnabled;
}
@@ -716,29 +704,6 @@ public void setContainerExecutor(ContainerExecutor executor) {
}
/**
- * Class that keeps the context for containers queued at the NM.
- */
- public static class QueuingNMContext implements Context.QueuingContext {
- protected final ConcurrentMap
- queuedContainers = new ConcurrentSkipListMap<>();
-
- protected final ConcurrentMap
- killedQueuedContainers = new ConcurrentHashMap<>();
-
- @Override
- public ConcurrentMap
- getQueuedContainers() {
- return this.queuedContainers;
- }
-
- @Override
- public ConcurrentMap
- getKilledQueuedContainers() {
- return this.killedQueuedContainers;
- }
- }
-
- /**
* @return the node health checker
*/
public NodeHealthCheckerService getNodeHealthChecker() {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index f692bf1..2e4615d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -47,7 +47,6 @@
import org.apache.hadoop.util.VersionUtil;
import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest;
import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
@@ -62,7 +61,6 @@
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factories.impl.pb.RecordFactoryPBImpl;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.ServerRMProxy;
@@ -89,7 +87,6 @@
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
-import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.util.YarnVersionInfo;
@@ -472,7 +469,8 @@ protected NodeStatus getNodeStatus(int responseId) throws IOException {
private QueuedContainersStatus getQueuedContainerStatus() {
QueuedContainersStatus status = QueuedContainersStatus.newInstance();
status.setWaitQueueLength(
- this.context.getQueuingContext().getQueuedContainers().size());
+ this.context.getContainerManager().getContainerScheduler()
+ .getNumQueuedContainers());
return status;
}
/**
@@ -550,9 +548,6 @@ private void updateNMResource(Resource resource) {
}
}
- // Account for all containers that got killed while they were still queued.
- pendingCompletedContainers.putAll(getKilledQueuedContainerStatuses());
-
containerStatuses.addAll(pendingCompletedContainers.values());
if (LOG.isDebugEnabled()) {
@@ -562,43 +557,6 @@ private void updateNMResource(Resource resource) {
return containerStatuses;
}
- /**
- * Add to the container statuses the status of the containers that got killed
- * while they were queued.
- */
- private Map getKilledQueuedContainerStatuses() {
- Map killedQueuedContainerStatuses =
- new HashMap<>();
- for (Map.Entry killedQueuedContainer :
- this.context.getQueuingContext().
- getKilledQueuedContainers().entrySet()) {
- ContainerTokenIdentifier containerTokenId = killedQueuedContainer
- .getKey();
- ContainerId containerId = containerTokenId.getContainerID();
- ContainerStatus containerStatus = BuilderUtils.newContainerStatus(
- containerId, ContainerState.COMPLETE,
- killedQueuedContainer.getValue(), ContainerExitStatus.ABORTED,
- containerTokenId.getResource(), containerTokenId.getExecutionType());
- ApplicationId applicationId = containerId.getApplicationAttemptId()
- .getApplicationId();
- if (isApplicationStopped(applicationId)) {
- if (LOG.isDebugEnabled()) {
- LOG.debug(applicationId + " is completing, " + " remove "
- + containerId + " from NM context.");
- }
- this.context.getQueuingContext().getKilledQueuedContainers()
- .remove(containerTokenId);
- killedQueuedContainerStatuses.put(containerId, containerStatus);
- } else {
- if (!isContainerRecentlyStopped(containerId)) {
- killedQueuedContainerStatuses.put(containerId, containerStatus);
- }
- }
- addCompletedContainer(containerId);
- }
- return killedQueuedContainerStatuses;
- }
-
private List getRunningApplications() {
List runningApplications = new ArrayList();
runningApplications.addAll(this.context.getApplications().keySet());
@@ -683,17 +641,6 @@ public void removeOrTrackCompletedContainersFromContext(
}
}
- // Remove null containers from queuing context for killed queued containers.
- Iterator killedQueuedContIter =
- context.getQueuingContext().getKilledQueuedContainers().keySet().
- iterator();
- while (killedQueuedContIter.hasNext()) {
- if (removedNullContainers.contains(
- killedQueuedContIter.next().getContainerID())) {
- killedQueuedContIter.remove();
- }
- }
-
if (!removedContainers.isEmpty()) {
LOG.info("Removed completed containers from NM context: "
+ removedContainers);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
index 0da02b3..0551a8c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java
@@ -25,6 +25,8 @@
import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor
.ContainersMonitor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler
+ .ContainerScheduler;
/**
* The ContainerManager is an entity that manages the life cycle of Containers.
@@ -39,4 +41,6 @@
void setBlockNewContainerRequests(boolean blockNewContainerRequests);
+ ContainerScheduler getContainerScheduler();
+
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index e8c2b75..ff27629 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -135,6 +135,9 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredApplicationsState;
@@ -204,6 +207,7 @@
private final WriteLock writeLock;
private AMRMProxyService amrmProxyService;
protected boolean amrmProxyEnabled = false;
+ private final ContainerScheduler containerScheduler;
private long waitForContainersOnShutdownMillis;
@@ -230,6 +234,8 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec,
addService(containersLauncher);
this.nodeStatusUpdater = nodeStatusUpdater;
+ this.containerScheduler = createContainerScheduler(context);
+ addService(containerScheduler);
// Start configurable services
auxiliaryServices = new AuxServices();
@@ -258,7 +264,8 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec,
dispatcher.register(AuxServicesEventType.class, auxiliaryServices);
dispatcher.register(ContainersMonitorEventType.class, containersMonitor);
dispatcher.register(ContainersLauncherEventType.class, containersLauncher);
-
+ dispatcher.register(ContainerSchedulerEventType.class, containerScheduler);
+
addService(dispatcher);
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
@@ -310,6 +317,10 @@ protected void createAMRMProxyService(Configuration conf) {
}
}
+ protected ContainerScheduler createContainerScheduler(Context cntxt) {
+ return new ContainerScheduler(cntxt);
+ }
+
protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) {
return new ContainersMonitorImpl(exec, dispatcher, this.context);
}
@@ -1522,7 +1533,7 @@ protected boolean isServiceStopped() {
@Override
public void updateQueuingLimit(ContainerQueuingLimit queuingLimit) {
- LOG.trace("Implementation does not support queuing of Containers !!");
+ this.containerScheduler.updateQueuingLimit(queuingLimit);
}
@SuppressWarnings("unchecked")
@@ -1683,4 +1694,9 @@ private void internalSignalToContainer(SignalContainerRequest request,
LOG.info("Container " + containerId + " no longer exists");
}
}
+
+ @Override
+ public ContainerScheduler getContainerScheduler() {
+ return this.containerScheduler;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
index 78c240a..e5ee381 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
@@ -83,7 +83,13 @@
boolean isReInitializing();
+ boolean isMarkedToKill();
+
boolean canRollback();
void commitUpgrade();
+
+ void sendLaunchEvent();
+
+ void sendKillEvent(int exitStatus, String description);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 4bc0a0f..0f66fe0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -74,6 +74,8 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState;
@@ -164,6 +166,7 @@ private ReInitializationContext createContextForRollback() {
private String ips;
private volatile ReInitializationContext reInitContext;
private volatile boolean isReInitializing = false;
+ private volatile boolean isMarkedToKill = false;
/** The NM-wide configuration - not specific to this container */
private final Configuration daemonConf;
@@ -286,7 +289,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
// From NEW State
.addTransition(ContainerState.NEW,
EnumSet.of(ContainerState.LOCALIZING,
- ContainerState.LOCALIZED,
+ ContainerState.SCHEDULED,
ContainerState.LOCALIZATION_FAILED,
ContainerState.DONE),
ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition())
@@ -298,7 +301,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
// From LOCALIZING State
.addTransition(ContainerState.LOCALIZING,
- EnumSet.of(ContainerState.LOCALIZING, ContainerState.LOCALIZED),
+ EnumSet.of(ContainerState.LOCALIZING, ContainerState.SCHEDULED),
ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition())
.addTransition(ContainerState.LOCALIZING,
ContainerState.LOCALIZATION_FAILED,
@@ -309,7 +312,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
UPDATE_DIAGNOSTICS_TRANSITION)
.addTransition(ContainerState.LOCALIZING, ContainerState.KILLING,
ContainerEventType.KILL_CONTAINER,
- new KillDuringLocalizationTransition())
+ new KillBeforeRunningTransition())
// From LOCALIZATION_FAILED State
.addTransition(ContainerState.LOCALIZATION_FAILED,
@@ -334,17 +337,18 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
ContainerState.LOCALIZATION_FAILED,
ContainerEventType.RESOURCE_FAILED)
- // From LOCALIZED State
- .addTransition(ContainerState.LOCALIZED, ContainerState.RUNNING,
+ // From SCHEDULED State
+ .addTransition(ContainerState.SCHEDULED, ContainerState.RUNNING,
ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition())
- .addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE,
+ .addTransition(ContainerState.SCHEDULED, ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
new ExitedWithFailureTransition(true))
- .addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED,
+ .addTransition(ContainerState.SCHEDULED, ContainerState.SCHEDULED,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION)
- .addTransition(ContainerState.LOCALIZED, ContainerState.KILLING,
- ContainerEventType.KILL_CONTAINER, new KillTransition())
+ .addTransition(ContainerState.SCHEDULED, ContainerState.KILLING,
+ ContainerEventType.KILL_CONTAINER,
+ new KillBeforeRunningTransition())
// From RUNNING State
.addTransition(ContainerState.RUNNING,
@@ -353,7 +357,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
new ExitedWithSuccessTransition(true))
.addTransition(ContainerState.RUNNING,
EnumSet.of(ContainerState.RELAUNCHING,
- ContainerState.LOCALIZED,
+ ContainerState.SCHEDULED,
ContainerState.EXITED_WITH_FAILURE),
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
new RetryFailureTransition())
@@ -402,7 +406,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
.addTransition(ContainerState.REINITIALIZING, ContainerState.KILLING,
ContainerEventType.KILL_CONTAINER, new KillTransition())
.addTransition(ContainerState.REINITIALIZING,
- ContainerState.LOCALIZED,
+ ContainerState.SCHEDULED,
ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
new KilledForReInitializationTransition())
@@ -520,9 +524,11 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher,
case NEW:
case LOCALIZING:
case LOCALIZATION_FAILED:
- case LOCALIZED:
+ case SCHEDULED:
+ return org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED;
case RUNNING:
case RELAUNCHING:
+ case REINITIALIZING:
case EXITED_WITH_SUCCESS:
case EXITED_WITH_FAILURE:
case KILLING:
@@ -553,7 +559,7 @@ public String getUser() {
public Map> getLocalizedResources() {
this.readLock.lock();
try {
- if (ContainerState.LOCALIZED == getContainerState()
+ if (ContainerState.SCHEDULED == getContainerState()
|| ContainerState.RELAUNCHING == getContainerState()) {
return resourceSet.getLocalizedResources();
} else {
@@ -690,6 +696,9 @@ private void sendFinishedEvents() {
ContainerStatus containerStatus = cloneAndGetContainerStatus();
eventHandler.handle(new ApplicationContainerFinishedEvent(containerStatus));
+ // Tell the scheduler the container is Done
+ eventHandler.handle(new ContainerSchedulerEvent(this,
+ ContainerSchedulerEventType.CONTAINER_COMPLETED));
// Remove the container from the resource-monitor
eventHandler.handle(new ContainerStopMonitoringEvent(containerId));
// Tell the logService too
@@ -698,7 +707,8 @@ private void sendFinishedEvents() {
}
@SuppressWarnings("unchecked") // dispatcher not typed
- private void sendLaunchEvent() {
+ @Override
+ public void sendLaunchEvent() {
ContainersLauncherEventType launcherEvent =
ContainersLauncherEventType.LAUNCH_CONTAINER;
if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) {
@@ -711,6 +721,22 @@ private void sendLaunchEvent() {
}
@SuppressWarnings("unchecked") // dispatcher not typed
+ private void sendScheduleEvent() {
+ dispatcher.getEventHandler().handle(
+ new ContainerSchedulerEvent(this,
+ ContainerSchedulerEventType.SCHEDULE_CONTAINER)
+ );
+ }
+
+ @SuppressWarnings("unchecked") // dispatcher not typed
+ @Override
+ public void sendKillEvent(int exitStatus, String description) {
+ this.isMarkedToKill = true;
+ dispatcher.getEventHandler().handle(
+ new ContainerKillEvent(containerId, exitStatus, description));
+ }
+
+ @SuppressWarnings("unchecked") // dispatcher not typed
private void sendRelaunchEvent() {
ContainersLauncherEventType launcherEvent =
ContainersLauncherEventType.RELAUNCH_CONTAINER;
@@ -781,7 +807,7 @@ public void transition(ContainerImpl container, ContainerEvent event) {
* to the ResourceLocalizationManager and enters LOCALIZING state.
*
* If there are no resources to localize, sends LAUNCH_CONTAINER event
- * and enters LOCALIZED state directly.
+ * and enters SCHEDULED state directly.
*
* If there are any invalid resources specified, enters LOCALIZATION_FAILED
* directly.
@@ -847,9 +873,9 @@ public ContainerState transition(ContainerImpl container,
}
return ContainerState.LOCALIZING;
} else {
- container.sendLaunchEvent();
+ container.sendScheduleEvent();
container.metrics.endInitingContainer();
- return ContainerState.LOCALIZED;
+ return ContainerState.SCHEDULED;
}
}
}
@@ -889,7 +915,7 @@ public ContainerState transition(ContainerImpl container,
new ContainerLocalizationEvent(LocalizationEventType.
CONTAINER_RESOURCES_LOCALIZED, container));
- container.sendLaunchEvent();
+ container.sendScheduleEvent();
container.metrics.endInitingContainer();
// If this is a recovered container that has already launched, skip
@@ -909,7 +935,7 @@ public ContainerState transition(ContainerImpl container,
SharedCacheUploadEventType.UPLOAD));
}
- return ContainerState.LOCALIZED;
+ return ContainerState.SCHEDULED;
}
}
@@ -1099,7 +1125,7 @@ public void transition(ContainerImpl container, ContainerEvent event) {
}
/**
- * Transition from LOCALIZED state to RUNNING state upon receiving
+ * Transition from SCHEDULED state to RUNNING state upon receiving
* a CONTAINER_LAUNCHED event.
*/
static class LaunchTransition extends ContainerTransition {
@@ -1257,7 +1283,7 @@ public ContainerState transition(final ContainerImpl container,
container.containerId.getApplicationAttemptId().getApplicationId(),
container.containerId);
new KilledForReInitializationTransition().transition(container, event);
- return ContainerState.LOCALIZED;
+ return ContainerState.SCHEDULED;
} else {
new ExitedWithFailureTransition(true).transition(container, event);
return ContainerState.EXITED_WITH_FAILURE;
@@ -1339,7 +1365,7 @@ public void transition(ContainerImpl container,
}
/**
- * Transition to LOCALIZED and wait for RE-LAUNCH
+ * Transition to SCHEDULED and wait for RE-LAUNCH
*/
static class KilledForReInitializationTransition extends ContainerTransition {
@@ -1363,8 +1389,8 @@ public void transition(ContainerImpl container,
container.resourceSet =
container.reInitContext.mergedResourceSet(container.resourceSet);
-
- container.sendLaunchEvent();
+ container.isMarkedToKill = false;
+ container.sendScheduleEvent();
}
}
@@ -1392,7 +1418,7 @@ public void transition(ContainerImpl container, ContainerEvent event) {
* Transition from LOCALIZING to KILLING upon receiving
* KILL_CONTAINER event.
*/
- static class KillDuringLocalizationTransition implements
+ static class KillBeforeRunningTransition implements
SingleArcTransition {
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
@@ -1424,7 +1450,7 @@ public void transition(ContainerImpl container, ContainerEvent event) {
/**
* Transitions upon receiving KILL_CONTAINER.
- * - LOCALIZED -> KILLING.
+ * - SCHEDULED -> KILLING.
* - RUNNING -> KILLING.
* - REINITIALIZING -> KILLING.
*/
@@ -1646,7 +1672,8 @@ public void handle(ContainerEvent event) {
stateMachine.doTransition(event.getType(), event);
} catch (InvalidStateTransitionException e) {
LOG.warn("Can't handle this event at current state: Current: ["
- + oldState + "], eventType: [" + event.getType() + "]", e);
+ + oldState + "], eventType: [" + event.getType() + "]," +
+ " container: [" + containerID + "]", e);
}
if (oldState != newState) {
LOG.info("Container " + containerID + " transitioned from "
@@ -1710,6 +1737,11 @@ public boolean isReInitializing() {
}
@Override
+ public boolean isMarkedToKill() {
+ return this.isMarkedToKill;
+ }
+
+ @Override
public boolean canRollback() {
return (this.reInitContext != null)
&& (this.reInitContext.canRollback());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java
index 70de90c..91d1356 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java
@@ -19,7 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.container;
public enum ContainerState {
- NEW, LOCALIZING, LOCALIZATION_FAILED, LOCALIZED, RUNNING, RELAUNCHING,
+ NEW, LOCALIZING, LOCALIZATION_FAILED, SCHEDULED, RUNNING, RELAUNCHING,
REINITIALIZING, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, KILLING,
CONTAINER_CLEANEDUP_AFTER_KILL, CONTAINER_RESOURCES_CLEANINGUP, DONE
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index d8239ef..e07b808 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -107,6 +107,7 @@
protected AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false);
protected AtomicBoolean completed = new AtomicBoolean(false);
+ private volatile boolean killedBeforeStart = false;
private long sleepDelayBeforeSigKill = 250;
private long maxKillWaitTime = 2000;
@@ -401,7 +402,12 @@ protected boolean validateContainerState() {
@SuppressWarnings("unchecked")
protected int launchContainer(ContainerStartContext ctx) throws IOException {
ContainerId containerId = container.getContainerId();
-
+ if (container.isMarkedToKill()) {
+ LOG.info("Container " + containerId + " not launched as it has already "
+ + "been marked for Killing");
+ this.killedBeforeStart = true;
+ return ExitCode.TERMINATED.getExitCode();
+ }
// LaunchContainer is a blocking call. We are here almost means the
// container is launched, so send out the event.
dispatcher.getEventHandler().handle(new ContainerEvent(
@@ -451,10 +457,14 @@ protected void handleContainerExitCode(int exitCode, Path containerLogDir) {
|| exitCode == ExitCode.TERMINATED.getExitCode()) {
// If the process was killed, Send container_cleanedup_after_kill and
// just break out of this method.
- dispatcher.getEventHandler().handle(
- new ContainerExitEvent(containerId,
- ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode,
- diagnosticInfo.toString()));
+
+ // If Container was killed before starting... NO need to do this.
+ if (!killedBeforeStart) {
+ dispatcher.getEventHandler().handle(
+ new ContainerExitEvent(containerId,
+ ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode,
+ diagnosticInfo.toString()));
+ }
} else if (exitCode != 0) {
handleContainerExitWithFailure(containerId, exitCode, containerLogDir,
diagnosticInfo);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java
index 1069b4f..e2541d2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java
@@ -22,25 +22,12 @@
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.nodemanager.ResourceView;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo;
public interface ContainersMonitor extends Service,
EventHandler, ResourceView {
- public ResourceUtilization getContainersUtilization();
+ ResourceUtilization getContainersUtilization();
- ResourceUtilization getContainersAllocation();
-
- boolean hasResourcesAvailable(ProcessTreeInfo pti);
-
- void increaseContainersAllocation(ProcessTreeInfo pti);
-
- void decreaseContainersAllocation(ProcessTreeInfo pti);
-
- void increaseResourceUtilization(ResourceUtilization resourceUtil,
- ProcessTreeInfo pti);
-
- void decreaseResourceUtilization(ResourceUtilization resourceUtil,
- ProcessTreeInfo pti);
+ float getVmemRatio();
void subtractNodeResourcesFromResourceUtilization(
ResourceUtilization resourceUtil);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index a04a914..ead928f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -90,9 +90,6 @@
}
private ResourceUtilization containersUtilization;
- // Tracks the aggregated allocation of the currently allocated containers
- // when queuing of containers at the NMs is enabled.
- private ResourceUtilization containersAllocation;
private volatile boolean stopped = false;
@@ -107,7 +104,6 @@ public ContainersMonitorImpl(ContainerExecutor exec,
this.monitoringThread = new MonitoringThread();
this.containersUtilization = ResourceUtilization.newInstance(0, 0, 0.0f);
- this.containersAllocation = ResourceUtilization.newInstance(0, 0, 0.0f);
}
@Override
@@ -651,6 +647,8 @@ private void changeContainerResource(
LOG.warn("Container " + containerId.toString() + "does not exist");
return;
}
+ // TODO: Route this through the ContainerScheduler to
+ // fix containerAllocation
container.setResource(resource);
}
@@ -750,67 +748,6 @@ public void setContainersUtilization(ResourceUtilization utilization) {
this.containersUtilization = utilization;
}
- public ResourceUtilization getContainersAllocation() {
- return this.containersAllocation;
- }
-
- /**
- * @return true if there are available allocated resources for the given
- * container to start.
- */
- @Override
- public boolean hasResourcesAvailable(ProcessTreeInfo pti) {
- synchronized (this.containersAllocation) {
- // Check physical memory.
- if (this.containersAllocation.getPhysicalMemory() +
- (int) (pti.getPmemLimit() >> 20) >
- (int) (getPmemAllocatedForContainers() >> 20)) {
- return false;
- }
- // Check virtual memory.
- if (isVmemCheckEnabled() &&
- this.containersAllocation.getVirtualMemory() +
- (int) (pti.getVmemLimit() >> 20) >
- (int) (getVmemAllocatedForContainers() >> 20)) {
- return false;
- }
- // Check CPU.
- if (this.containersAllocation.getCPU()
- + allocatedCpuUsage(pti) > 1.0f) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public void increaseContainersAllocation(ProcessTreeInfo pti) {
- synchronized (this.containersAllocation) {
- increaseResourceUtilization(this.containersAllocation, pti);
- }
- }
-
- @Override
- public void decreaseContainersAllocation(ProcessTreeInfo pti) {
- synchronized (this.containersAllocation) {
- decreaseResourceUtilization(this.containersAllocation, pti);
- }
- }
-
- @Override
- public void increaseResourceUtilization(ResourceUtilization resourceUtil,
- ProcessTreeInfo pti) {
- resourceUtil.addTo((int) (pti.getPmemLimit() >> 20),
- (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti));
- }
-
- @Override
- public void decreaseResourceUtilization(ResourceUtilization resourceUtil,
- ProcessTreeInfo pti) {
- resourceUtil.subtractFrom((int) (pti.getPmemLimit() >> 20),
- (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti));
- }
-
@Override
public void subtractNodeResourcesFromResourceUtilization(
ResourceUtilization resourceUtil) {
@@ -818,14 +755,9 @@ public void subtractNodeResourcesFromResourceUtilization(
(int) (getVmemAllocatedForContainers() >> 20), 1.0f);
}
- /**
- * Calculates the vCores CPU usage that is assigned to the given
- * {@link ProcessTreeInfo}. In particular, it takes into account the number of
- * vCores that are allowed to be used by the NM and returns the CPU usage
- * as a normalized value between {@literal >=} 0 and {@literal <=} 1.
- */
- private float allocatedCpuUsage(ProcessTreeInfo pti) {
- return (float) pti.getCpuVcores() / getVCoresAllocatedForContainers();
+ @Override
+ public float getVmemRatio() {
+ return vmemRatio;
}
@Override
@@ -896,5 +828,4 @@ protected void onStartMonitoringContainer(
startEvent.getVmemLimit(), startEvent.getPmemLimit(),
startEvent.getCpuVcores()));
}
-
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java
deleted file mode 100644
index 5d2f4d4..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java
+++ /dev/null
@@ -1,654 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
-import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
-import org.apache.hadoop.yarn.api.records.ExecutionType;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.ResourceUtilization;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
-import org.apache.hadoop.yarn.security.NMTokenIdentifier;
-import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit;
-import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
-import org.apache.hadoop.yarn.server.nodemanager.Context;
-import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
-import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
-import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo;
-import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
-import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState;
-import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerStatus;
-import org.apache.hadoop.yarn.server.utils.BuilderUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.annotations.VisibleForTesting;
-
-/**
- * Class extending {@link ContainerManagerImpl} and is used when queuing at the
- * NM is enabled.
- */
-public class QueuingContainerManagerImpl extends ContainerManagerImpl {
-
- private static final Logger LOG = LoggerFactory
- .getLogger(QueuingContainerManagerImpl.class);
-
- private ConcurrentMap
- allocatedGuaranteedContainers;
- private ConcurrentMap
- allocatedOpportunisticContainers;
-
- private Queue queuedGuaranteedContainers;
- private Queue queuedOpportunisticContainers;
-
- private Set opportunisticContainersToKill;
- private final ContainerQueuingLimit queuingLimit;
-
- public QueuingContainerManagerImpl(Context context, ContainerExecutor exec,
- DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
- NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) {
- super(context, exec, deletionContext, nodeStatusUpdater, metrics,
- dirsHandler);
- this.allocatedGuaranteedContainers = new ConcurrentHashMap<>();
- this.allocatedOpportunisticContainers = new ConcurrentHashMap<>();
- this.queuedGuaranteedContainers = new ConcurrentLinkedQueue<>();
- this.queuedOpportunisticContainers = new ConcurrentLinkedQueue<>();
- this.opportunisticContainersToKill = Collections.synchronizedSet(
- new HashSet());
- this.queuingLimit = ContainerQueuingLimit.newInstance();
- }
-
- @Override
- protected EventHandler createApplicationEventDispatcher() {
- return new QueuingApplicationEventDispatcher(
- super.createApplicationEventDispatcher());
- }
-
- @Override
- protected void startContainerInternal(
- ContainerTokenIdentifier containerTokenIdentifier,
- StartContainerRequest request) throws YarnException, IOException {
- this.context.getQueuingContext().getQueuedContainers().put(
- containerTokenIdentifier.getContainerID(), containerTokenIdentifier);
-
- AllocatedContainerInfo allocatedContInfo = new AllocatedContainerInfo(
- containerTokenIdentifier, request,
- containerTokenIdentifier.getExecutionType(), containerTokenIdentifier
- .getResource(), getConfig());
-
- // If there are already free resources for the container to start, and
- // there are no queued containers waiting to be executed, start this
- // container immediately.
- if (queuedGuaranteedContainers.isEmpty() &&
- queuedOpportunisticContainers.isEmpty() &&
- getContainersMonitor().
- hasResourcesAvailable(allocatedContInfo.getPti())) {
- startAllocatedContainer(allocatedContInfo);
- } else {
- ContainerId cIdToStart = containerTokenIdentifier.getContainerID();
- this.context.getNMStateStore().storeContainer(cIdToStart,
- containerTokenIdentifier.getVersion(), request);
- this.context.getNMStateStore().storeContainerQueued(cIdToStart);
- LOG.info("No available resources for container {} to start its execution "
- + "immediately.", cIdToStart);
- if (allocatedContInfo.getExecutionType() == ExecutionType.GUARANTEED) {
- queuedGuaranteedContainers.add(allocatedContInfo);
- // Kill running opportunistic containers to make space for
- // guaranteed container.
- killOpportunisticContainers(allocatedContInfo);
- } else {
- LOG.info("Opportunistic container {} will be queued at the NM.",
- cIdToStart);
- queuedOpportunisticContainers.add(allocatedContInfo);
- }
- }
- }
-
- @Override
- protected void stopContainerInternal(ContainerId containerID)
- throws YarnException, IOException {
- Container container = this.context.getContainers().get(containerID);
- // If container is null and distributed scheduling is enabled, container
- // might be queued. Otherwise, container might not be handled by this NM.
- if (container == null && this.context.getQueuingContext()
- .getQueuedContainers().containsKey(containerID)) {
- ContainerTokenIdentifier containerTokenId = this.context
- .getQueuingContext().getQueuedContainers().remove(containerID);
-
- boolean foundInQueue = removeQueuedContainer(containerID,
- containerTokenId.getExecutionType());
-
- if (foundInQueue) {
- LOG.info("Removing queued container with ID " + containerID);
- this.context.getQueuingContext().getKilledQueuedContainers().put(
- containerTokenId,
- "Queued container request removed by ApplicationMaster.");
- this.context.getNMStateStore().storeContainerKilled(containerID);
- } else {
- // The container started execution in the meanwhile.
- try {
- stopContainerInternalIfRunning(containerID);
- } catch (YarnException | IOException e) {
- LOG.error("Container did not get removed successfully.", e);
- }
- }
-
- nodeStatusUpdater.sendOutofBandHeartBeat();
- } else {
- super.stopContainerInternal(containerID);
- }
- }
-
- /**
- * Start the execution of the given container. Also add it to the allocated
- * containers, and update allocated resource utilization.
- */
- private void startAllocatedContainer(
- AllocatedContainerInfo allocatedContainerInfo) {
- ProcessTreeInfo pti = allocatedContainerInfo.getPti();
-
- if (allocatedContainerInfo.getExecutionType() ==
- ExecutionType.GUARANTEED) {
- allocatedGuaranteedContainers.put(pti.getContainerId(),
- allocatedContainerInfo);
- } else {
- allocatedOpportunisticContainers.put(pti.getContainerId(),
- allocatedContainerInfo);
- }
-
- getContainersMonitor().increaseContainersAllocation(pti);
-
- // Start execution of container.
- ContainerId containerId = allocatedContainerInfo
- .getContainerTokenIdentifier().getContainerID();
- this.context.getQueuingContext().getQueuedContainers().remove(containerId);
- try {
- LOG.info("Starting container [" + containerId + "]");
- super.startContainerInternal(
- allocatedContainerInfo.getContainerTokenIdentifier(),
- allocatedContainerInfo.getStartRequest());
- } catch (YarnException | IOException e) {
- containerFailedToStart(pti.getContainerId(),
- allocatedContainerInfo.getContainerTokenIdentifier());
- LOG.error("Container failed to start.", e);
- }
- }
-
- private void containerFailedToStart(ContainerId containerId,
- ContainerTokenIdentifier containerTokenId) {
- this.context.getQueuingContext().getQueuedContainers().remove(containerId);
-
- removeAllocatedContainer(containerId);
-
- this.context.getQueuingContext().getKilledQueuedContainers().put(
- containerTokenId,
- "Container removed from queue as it failed to start.");
- }
-
- /**
- * Remove the given container from the container queues.
- *
- * @return true if the container was found in one of the queues.
- */
- private boolean removeQueuedContainer(ContainerId containerId,
- ExecutionType executionType) {
- Queue queue =
- (executionType == ExecutionType.GUARANTEED) ?
- queuedGuaranteedContainers : queuedOpportunisticContainers;
-
- boolean foundInQueue = false;
- Iterator iter = queue.iterator();
- while (iter.hasNext() && !foundInQueue) {
- if (iter.next().getPti().getContainerId().equals(containerId)) {
- iter.remove();
- foundInQueue = true;
- }
- }
-
- return foundInQueue;
- }
-
- /**
- * Remove the given container from the allocated containers, and update
- * allocated container utilization accordingly.
- */
- private void removeAllocatedContainer(ContainerId containerId) {
- AllocatedContainerInfo contToRemove = null;
-
- contToRemove = allocatedGuaranteedContainers.remove(containerId);
-
- if (contToRemove == null) {
- contToRemove = allocatedOpportunisticContainers.remove(containerId);
- }
-
- // If container was indeed running, update allocated resource utilization.
- if (contToRemove != null) {
- getContainersMonitor().decreaseContainersAllocation(contToRemove
- .getPti());
- }
- }
-
- /**
- * Stop a container only if it is currently running. If queued, do not stop
- * it.
- */
- private void stopContainerInternalIfRunning(ContainerId containerID)
- throws YarnException, IOException {
- if (this.context.getContainers().containsKey(containerID)) {
- stopContainerInternal(containerID);
- }
- }
-
- /**
- * Kill opportunistic containers to free up resources for running the given
- * container.
- *
- * @param allocatedContInfo
- * the container whose execution needs to start by freeing up
- * resources occupied by opportunistic containers.
- */
- private void killOpportunisticContainers(
- AllocatedContainerInfo allocatedContInfo) {
- ContainerId containerToStartId = allocatedContInfo.getPti()
- .getContainerId();
- List extraOpportContainersToKill =
- pickOpportunisticContainersToKill(containerToStartId);
-
- // Kill the opportunistic containers that were chosen.
- for (ContainerId contIdToKill : extraOpportContainersToKill) {
- try {
- stopContainerInternalIfRunning(contIdToKill);
- } catch (YarnException | IOException e) {
- LOG.error("Container did not get removed successfully.", e);
- }
- LOG.info(
- "Opportunistic container {} will be killed in order to start the "
- + "execution of guaranteed container {}.",
- contIdToKill, containerToStartId);
- }
- }
-
- /**
- * Choose the opportunistic containers to kill in order to free up resources
- * for running the given container.
- *
- * @param containerToStartId
- * the container whose execution needs to start by freeing up
- * resources occupied by opportunistic containers.
- * @return the additional opportunistic containers that need to be killed.
- */
- protected List pickOpportunisticContainersToKill(
- ContainerId containerToStartId) {
- // The additional opportunistic containers that need to be killed for the
- // given container to start.
- List extraOpportContainersToKill = new ArrayList<>();
- // Track resources that need to be freed.
- ResourceUtilization resourcesToFreeUp = resourcesToFreeUp(
- containerToStartId);
-
- // Go over the running opportunistic containers. Avoid containers that have
- // already been marked for killing.
- boolean hasSufficientResources = false;
- for (Map.Entry runningOpportCont :
- allocatedOpportunisticContainers.entrySet()) {
- ContainerId runningOpportContId = runningOpportCont.getKey();
-
- // If there are sufficient resources to execute the given container, do
- // not kill more opportunistic containers.
- if (resourcesToFreeUp.getPhysicalMemory() <= 0 &&
- resourcesToFreeUp.getVirtualMemory() <= 0 &&
- resourcesToFreeUp.getCPU() <= 0.0f) {
- hasSufficientResources = true;
- break;
- }
-
- if (!opportunisticContainersToKill.contains(runningOpportContId)) {
- extraOpportContainersToKill.add(runningOpportContId);
- opportunisticContainersToKill.add(runningOpportContId);
- getContainersMonitor().decreaseResourceUtilization(resourcesToFreeUp,
- runningOpportCont.getValue().getPti());
- }
- }
-
- if (!hasSufficientResources) {
- LOG.info(
- "There are no sufficient resources to start guaranteed {} even after "
- + "attempting to kill any running opportunistic containers.",
- containerToStartId);
- }
-
- return extraOpportContainersToKill;
- }
-
- /**
- * Calculates the amount of resources that need to be freed up (by killing
- * opportunistic containers) in order for the given guaranteed container to
- * start its execution. Resource allocation to be freed up =
- * containersAllocation -
- * allocation of opportunisticContainersToKill +
- * allocation of queuedGuaranteedContainers that will start
- * before the given container +
- * allocation of given container -
- * total resources of node.
- *
- * @param containerToStartId
- * the ContainerId of the guaranteed container for which we need to
- * free resources, so that its execution can start.
- * @return the resources that need to be freed up for the given guaranteed
- * container to start.
- */
- private ResourceUtilization resourcesToFreeUp(
- ContainerId containerToStartId) {
- // Get allocation of currently allocated containers.
- ResourceUtilization resourceAllocationToFreeUp = ResourceUtilization
- .newInstance(getContainersMonitor().getContainersAllocation());
-
- // Subtract from the allocation the allocation of the opportunistic
- // containers that are marked for killing.
- for (ContainerId opportContId : opportunisticContainersToKill) {
- if (allocatedOpportunisticContainers.containsKey(opportContId)) {
- getContainersMonitor().decreaseResourceUtilization(
- resourceAllocationToFreeUp,
- allocatedOpportunisticContainers.get(opportContId).getPti());
- }
- }
- // Add to the allocation the allocation of the pending guaranteed
- // containers that will start before the current container will be started.
- for (AllocatedContainerInfo guarContInfo : queuedGuaranteedContainers) {
- getContainersMonitor().increaseResourceUtilization(
- resourceAllocationToFreeUp, guarContInfo.getPti());
- if (guarContInfo.getPti().getContainerId().equals(containerToStartId)) {
- break;
- }
- }
- // Subtract the overall node resources.
- getContainersMonitor().subtractNodeResourcesFromResourceUtilization(
- resourceAllocationToFreeUp);
- return resourceAllocationToFreeUp;
- }
-
- /**
- * If there are available resources, try to start as many pending containers
- * as possible.
- */
- private void startPendingContainers() {
- // Start pending guaranteed containers, if resources available.
- boolean resourcesAvailable =
- startContainersFromQueue(queuedGuaranteedContainers);
-
- // Start opportunistic containers, if resources available.
- if (resourcesAvailable) {
- startContainersFromQueue(queuedOpportunisticContainers);
- }
- }
-
- private boolean startContainersFromQueue(
- Queue queuedContainers) {
- Iterator guarIter = queuedContainers.iterator();
- boolean resourcesAvailable = true;
-
- while (guarIter.hasNext() && resourcesAvailable) {
- AllocatedContainerInfo allocatedContInfo = guarIter.next();
-
- if (getContainersMonitor().hasResourcesAvailable(
- allocatedContInfo.getPti())) {
- startAllocatedContainer(allocatedContInfo);
- guarIter.remove();
- } else {
- resourcesAvailable = false;
- }
- }
- return resourcesAvailable;
- }
-
- @Override
- protected ContainerStatus getContainerStatusInternal(ContainerId containerID,
- NMTokenIdentifier nmTokenIdentifier) throws YarnException {
- Container container = this.context.getContainers().get(containerID);
- if (container == null) {
- ContainerTokenIdentifier containerTokenId = this.context
- .getQueuingContext().getQueuedContainers().get(containerID);
- if (containerTokenId != null) {
- ExecutionType executionType = this.context.getQueuingContext()
- .getQueuedContainers().get(containerID).getExecutionType();
- return BuilderUtils.newContainerStatus(containerID,
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, "",
- ContainerExitStatus.INVALID, this.context.getQueuingContext()
- .getQueuedContainers().get(containerID).getResource(),
- executionType);
- } else {
- // Check if part of the stopped/killed queued containers.
- for (ContainerTokenIdentifier cTokenId : this.context
- .getQueuingContext().getKilledQueuedContainers().keySet()) {
- if (cTokenId.getContainerID().equals(containerID)) {
- return BuilderUtils.newContainerStatus(containerID,
- org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE,
- this.context.getQueuingContext().getKilledQueuedContainers()
- .get(cTokenId), ContainerExitStatus.ABORTED, cTokenId
- .getResource(), cTokenId.getExecutionType());
- }
- }
- }
- }
- return super.getContainerStatusInternal(containerID, nmTokenIdentifier);
- }
-
- /**
- * Recover running or queued container.
- */
- @Override
- protected void recoverActiveContainer(
- ContainerLaunchContext launchContext, ContainerTokenIdentifier token,
- RecoveredContainerState rcs) throws IOException {
- if (rcs.getStatus() ==
- RecoveredContainerStatus.QUEUED && !rcs.getKilled()) {
- LOG.info(token.getContainerID()
- + "will be added to the queued containers.");
-
- AllocatedContainerInfo allocatedContInfo = new AllocatedContainerInfo(
- token, rcs.getStartRequest(), token.getExecutionType(),
- token.getResource(), getConfig());
-
- this.context.getQueuingContext().getQueuedContainers().put(
- token.getContainerID(), token);
-
- if (allocatedContInfo.getExecutionType() == ExecutionType.GUARANTEED) {
- queuedGuaranteedContainers.add(allocatedContInfo);
- // Kill running opportunistic containers to make space for
- // guaranteed container.
- killOpportunisticContainers(allocatedContInfo);
- } else {
- queuedOpportunisticContainers.add(allocatedContInfo);
- }
- } else {
- super.recoverActiveContainer(launchContext, token, rcs);
- }
- }
-
- @VisibleForTesting
- public int getNumAllocatedGuaranteedContainers() {
- return allocatedGuaranteedContainers.size();
- }
-
- @VisibleForTesting
- public int getNumAllocatedOpportunisticContainers() {
- return allocatedOpportunisticContainers.size();
- }
-
- @VisibleForTesting
- public int getNumQueuedGuaranteedContainers() {
- return queuedGuaranteedContainers.size();
- }
-
- @VisibleForTesting
- public int getNumQueuedOpportunisticContainers() {
- return queuedOpportunisticContainers.size();
- }
-
- class QueuingApplicationEventDispatcher implements
- EventHandler {
- private EventHandler applicationEventDispatcher;
-
- public QueuingApplicationEventDispatcher(
- EventHandler applicationEventDispatcher) {
- this.applicationEventDispatcher = applicationEventDispatcher;
- }
-
- @Override
- public void handle(ApplicationEvent event) {
- if (event.getType() ==
- ApplicationEventType.APPLICATION_CONTAINER_FINISHED) {
- if (!(event instanceof ApplicationContainerFinishedEvent)) {
- throw new RuntimeException("Unexpected event type: " + event);
- }
- ApplicationContainerFinishedEvent finishEvent =
- (ApplicationContainerFinishedEvent) event;
- // Remove finished container from the allocated containers, and
- // attempt to start new containers.
- ContainerId contIdToRemove = finishEvent.getContainerID();
- removeAllocatedContainer(contIdToRemove);
- opportunisticContainersToKill.remove(contIdToRemove);
- startPendingContainers();
- }
- this.applicationEventDispatcher.handle(event);
- }
- }
-
- @Override
- public void updateQueuingLimit(ContainerQueuingLimit limit) {
- this.queuingLimit.setMaxQueueLength(limit.getMaxQueueLength());
- // TODO: Include wait time as well once it is implemented
- if (this.queuingLimit.getMaxQueueLength() > -1) {
- shedQueuedOpportunisticContainers();
- }
- }
-
- private void shedQueuedOpportunisticContainers() {
- int numAllowed = this.queuingLimit.getMaxQueueLength();
- Iterator containerIter =
- queuedOpportunisticContainers.iterator();
- while (containerIter.hasNext()) {
- AllocatedContainerInfo cInfo = containerIter.next();
- if (numAllowed <= 0) {
- containerIter.remove();
- ContainerTokenIdentifier containerTokenIdentifier = this.context
- .getQueuingContext().getQueuedContainers().remove(
- cInfo.getContainerTokenIdentifier().getContainerID());
- // The Container might have already started while we were
- // iterating..
- if (containerTokenIdentifier != null) {
- this.context.getQueuingContext().getKilledQueuedContainers()
- .putIfAbsent(cInfo.getContainerTokenIdentifier(),
- "Container de-queued to meet NM queuing limits. "
- + "Max Queue length["
- + this.queuingLimit.getMaxQueueLength() + "]");
- }
- }
- numAllowed--;
- }
- }
-
-
- static class AllocatedContainerInfo {
- private final ContainerTokenIdentifier containerTokenIdentifier;
- private final StartContainerRequest startRequest;
- private final ExecutionType executionType;
- private final ProcessTreeInfo pti;
-
- AllocatedContainerInfo(ContainerTokenIdentifier containerTokenIdentifier,
- StartContainerRequest startRequest, ExecutionType executionType,
- Resource resource, Configuration conf) {
- this.containerTokenIdentifier = containerTokenIdentifier;
- this.startRequest = startRequest;
- this.executionType = executionType;
- this.pti = createProcessTreeInfo(containerTokenIdentifier
- .getContainerID(), resource, conf);
- }
-
- private ContainerTokenIdentifier getContainerTokenIdentifier() {
- return this.containerTokenIdentifier;
- }
-
- private StartContainerRequest getStartRequest() {
- return this.startRequest;
- }
-
- private ExecutionType getExecutionType() {
- return this.executionType;
- }
-
- protected ProcessTreeInfo getPti() {
- return this.pti;
- }
-
- private ProcessTreeInfo createProcessTreeInfo(ContainerId containerId,
- Resource resource, Configuration conf) {
- long pmemBytes = resource.getMemorySize() * 1024 * 1024L;
- float pmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
- YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
- long vmemBytes = (long) (pmemRatio * pmemBytes);
- int cpuVcores = resource.getVirtualCores();
-
- return new ProcessTreeInfo(containerId, null, null, vmemBytes, pmemBytes,
- cpuVcores);
- }
-
- @Override
- public boolean equals(Object obj) {
- boolean equal = false;
- if (obj instanceof AllocatedContainerInfo) {
- AllocatedContainerInfo otherContInfo = (AllocatedContainerInfo) obj;
- equal = this.getPti().getContainerId()
- .equals(otherContInfo.getPti().getContainerId());
- }
- return equal;
- }
-
- @Override
- public int hashCode() {
- return this.getPti().getContainerId().hashCode();
- }
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java
deleted file mode 100644
index 0250807..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * This package contains classes related to the queuing of containers at
- * the NM.
- *
- */
-package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
new file mode 100644
index 0000000..4c39ce1
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java
@@ -0,0 +1,351 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
+import org.apache.hadoop.yarn.api.records.ResourceUtilization;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The ContainerScheduler manages a collection of runnable containers. It
+ * ensures that a container is launched only if all it launch criteria are
+ * met. It also ensures that OPPORTUNISTIC containers are killed to make
+ * room for GUARANTEED containers.
+ */
+public class ContainerScheduler extends AbstractService implements
+ EventHandler {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ContainerScheduler.class);
+
+ private final Context context;
+ private final int maxOppQueueLength;
+
+ // Queue of Guaranteed Containers waiting for resources to run
+ private final LinkedHashMap
+ queuedGuaranteedContainers = new LinkedHashMap<>();
+ // Queue of Opportunistic Containers waiting for resources to run
+ private final LinkedHashMap
+ queuedOpportunisticContainers = new LinkedHashMap<>();
+
+ // Used to keep track of containers that have been marked to be killed
+ // to make room for a guaranteed container.
+ private final Map oppContainersMarkedForKill =
+ new HashMap<>();
+
+ // Containers launched by the Scheduler will take a while to actually
+ // move to the RUNNING state, but should still be fair game for killing
+ // by the scheduler to make room for guaranteed containers.
+ private final LinkedHashMap runningContainers =
+ new LinkedHashMap<>();
+
+ private final ContainerQueuingLimit queuingLimit =
+ ContainerQueuingLimit.newInstance();
+
+ //
+ private ResourceUtilizationManager utilizationManager;
+
+ /**
+ * Instantiate a Container Scheduler.
+ * @param context NodeManager Context.
+ */
+ public ContainerScheduler(Context context) {
+ this(context, context.getConf().getInt(
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH,
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH_DEFAULT));
+ }
+
+ @VisibleForTesting
+ public ContainerScheduler(Context context, int qLength) {
+ super(ContainerScheduler.class.getName());
+ this.context = context;
+ this.maxOppQueueLength = (qLength <= 0) ? 0 : qLength;
+ this.utilizationManager = new ResourceUtilizationManager(this);
+ }
+
+ /**
+ * Handle ContainerSchedulerEvents.
+ * @param event ContainerSchedulerEvent.
+ */
+ @Override
+ public void handle(ContainerSchedulerEvent event) {
+ switch (event.getType()) {
+ case SCHEDULE_CONTAINER:
+ scheduleContainer(event.getContainer());
+ break;
+ case CONTAINER_COMPLETED:
+ onContainerCompleted(event.getContainer());
+ break;
+ default:
+ LOG.error("Unknown event arrived at ContainerScheduler: "
+ + event.toString());
+ }
+ }
+
+ /**
+ * Return number of queued containers.
+ * @return Number of queued containers.
+ */
+ public int getNumQueuedContainers() {
+ return this.queuedGuaranteedContainers.size()
+ + this.queuedOpportunisticContainers.size();
+ }
+
+ @VisibleForTesting
+ public int getNumQueuedGuaranteedContainers() {
+ return this.queuedGuaranteedContainers.size();
+ }
+
+ @VisibleForTesting
+ public int getNumQueuedOpportunisticContainers() {
+ return this.queuedOpportunisticContainers.size();
+ }
+
+ private void onContainerCompleted(Container container) {
+ // decrement only if it was a running container
+ if (runningContainers.containsKey(container.getContainerId())) {
+ this.utilizationManager.subtractContainerResource(container);
+ }
+ runningContainers.remove(container.getContainerId());
+ oppContainersMarkedForKill.remove(container.getContainerId());
+ startPendingContainers();
+ }
+
+ private void startPendingContainers() {
+ // Start pending guaranteed containers, if resources available.
+ boolean resourcesAvailable =
+ startContainersFromQueue(queuedGuaranteedContainers.values());
+ // Start opportunistic containers, if resources available.
+ if (resourcesAvailable) {
+ startContainersFromQueue(queuedOpportunisticContainers.values());
+ }
+ }
+
+ private boolean startContainersFromQueue(
+ Collection queuedContainers) {
+ Iterator cIter = queuedContainers.iterator();
+ boolean resourcesAvailable = true;
+ while (cIter.hasNext() && resourcesAvailable) {
+ Container container = cIter.next();
+ if (this.utilizationManager.hasResourcesAvailable(container)) {
+ startAllocatedContainer(container);
+ cIter.remove();
+ } else {
+ resourcesAvailable = false;
+ }
+ }
+ return resourcesAvailable;
+ }
+
+ @VisibleForTesting
+ protected void scheduleContainer(Container container) {
+ if (maxOppQueueLength <= 0) {
+ startAllocatedContainer(container);
+ return;
+ }
+ if (queuedGuaranteedContainers.isEmpty() &&
+ queuedOpportunisticContainers.isEmpty() &&
+ this.utilizationManager.hasResourcesAvailable(container)) {
+ startAllocatedContainer(container);
+ } else {
+ try {
+ this.context.getNMStateStore().storeContainerQueued(
+ container.getContainerId());
+ } catch (IOException e) {
+ LOG.warn("Could not store container state into store..", e);
+ }
+ LOG.info("No available resources for container {} to start its execution "
+ + "immediately.", container.getContainerId());
+ if (container.getContainerTokenIdentifier().getExecutionType() ==
+ ExecutionType.GUARANTEED) {
+ queuedGuaranteedContainers.put(container.getContainerId(), container);
+ // Kill running opportunistic containers to make space for
+ // guaranteed container.
+ killOpportunisticContainers(container);
+ } else {
+ if (queuedOpportunisticContainers.size() <= maxOppQueueLength) {
+ LOG.info("Opportunistic container {} will be queued at the NM.",
+ container.getContainerId());
+ queuedOpportunisticContainers.put(
+ container.getContainerId(), container);
+ } else {
+ LOG.info("Opportunistic container [{}] will not be queued at the NM" +
+ "since max queue length [{}] has been reached",
+ container.getContainerId(), maxOppQueueLength);
+ container.sendKillEvent(
+ ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER,
+ "Opportunistic container queue is full.");
+ }
+ }
+ }
+ }
+
+ private void killOpportunisticContainers(Container container) {
+ List extraOpportContainersToKill =
+ pickOpportunisticContainersToKill(container.getContainerId());
+ // Kill the opportunistic containers that were chosen.
+ for (Container contToKill : extraOpportContainersToKill) {
+ contToKill.sendKillEvent(
+ ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER,
+ "Container Killed to make room for Guaranteed Container.");
+ oppContainersMarkedForKill.put(contToKill.getContainerId(), contToKill);
+ LOG.info(
+ "Opportunistic container {} will be killed in order to start the "
+ + "execution of guaranteed container {}.",
+ contToKill.getContainerId(), container.getContainerId());
+ }
+ }
+
+ private void startAllocatedContainer(Container container) {
+ LOG.info("Starting container [" + container.getContainerId()+ "]");
+ runningContainers.put(container.getContainerId(), container);
+ this.utilizationManager.addContainerResources(container);
+ container.sendLaunchEvent();
+ }
+
+ private List pickOpportunisticContainersToKill(
+ ContainerId containerToStartId) {
+ // The additional opportunistic containers that need to be killed for the
+ // given container to start.
+ List extraOpportContainersToKill = new ArrayList<>();
+ // Track resources that need to be freed.
+ ResourceUtilization resourcesToFreeUp = resourcesToFreeUp(
+ containerToStartId);
+
+ // Go over the running opportunistic containers.
+ // Use a descending iterator to kill more recently started containers.
+ Iterator reverseContainerIterator =
+ new LinkedList<>(runningContainers.values()).descendingIterator();
+ while(reverseContainerIterator.hasNext() &&
+ !hasSufficientResources(resourcesToFreeUp)) {
+ Container runningCont = reverseContainerIterator.next();
+ if (runningCont.getContainerTokenIdentifier().getExecutionType() ==
+ ExecutionType.OPPORTUNISTIC) {
+
+ if (oppContainersMarkedForKill.containsKey(
+ runningCont.getContainerId())) {
+ // These containers have already been marked to be killed.
+ // So exclude them..
+ continue;
+ }
+ extraOpportContainersToKill.add(runningCont);
+ ResourceUtilizationManager.decreaseResourceUtilization(
+ getContainersMonitor(), resourcesToFreeUp,
+ runningCont.getResource());
+ }
+ }
+ if (!hasSufficientResources(resourcesToFreeUp)) {
+ LOG.warn("There are no sufficient resources to start guaranteed [{}]" +
+ "even after attempting to kill all running" +
+ "opportunistic containers.", containerToStartId);
+ }
+ return extraOpportContainersToKill;
+ }
+
+ private boolean hasSufficientResources(
+ ResourceUtilization resourcesToFreeUp) {
+ return resourcesToFreeUp.getPhysicalMemory() <= 0 &&
+ resourcesToFreeUp.getVirtualMemory() <= 0 &&
+ resourcesToFreeUp.getCPU() <= 0.0f;
+ }
+
+ private ResourceUtilization resourcesToFreeUp(
+ ContainerId containerToStartId) {
+ // Get allocation of currently allocated containers.
+ ResourceUtilization resourceAllocationToFreeUp = ResourceUtilization
+ .newInstance(this.utilizationManager.getCurrentUtilization());
+
+ // Add to the allocation the allocation of the pending guaranteed
+ // containers that will start before the current container will be started.
+ for (Container container : queuedGuaranteedContainers.values()) {
+ ResourceUtilizationManager.increaseResourceUtilization(
+ getContainersMonitor(), resourceAllocationToFreeUp,
+ container.getResource());
+ if (container.getContainerId().equals(containerToStartId)) {
+ break;
+ }
+ }
+
+ // These Resources have already been freed, due to demand from an
+ // earlier Guaranteed container.
+ for (Container container : oppContainersMarkedForKill.values()) {
+ ResourceUtilizationManager.decreaseResourceUtilization(
+ getContainersMonitor(), resourceAllocationToFreeUp,
+ container.getResource());
+ }
+
+ // Subtract the overall node resources.
+ getContainersMonitor().subtractNodeResourcesFromResourceUtilization(
+ resourceAllocationToFreeUp);
+ return resourceAllocationToFreeUp;
+ }
+
+ public void updateQueuingLimit(ContainerQueuingLimit limit) {
+ this.queuingLimit.setMaxQueueLength(limit.getMaxQueueLength());
+ // TODO: Include wait time as well once it is implemented
+ if (this.queuingLimit.getMaxQueueLength() > -1) {
+ shedQueuedOpportunisticContainers();
+ }
+ }
+
+ private void shedQueuedOpportunisticContainers() {
+ int numAllowed = this.queuingLimit.getMaxQueueLength();
+ Iterator containerIter =
+ queuedOpportunisticContainers.values().iterator();
+ while (containerIter.hasNext()) {
+ Container container = containerIter.next();
+ if (numAllowed <= 0) {
+ container.sendKillEvent(
+ ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER,
+ "Container Killed to make room for Guaranteed Container.");
+ containerIter.remove();
+ LOG.info(
+ "Opportunistic container {} will be killed to meet NM queuing" +
+ " limits.", container.getContainerId());
+ }
+ numAllowed--;
+ }
+ }
+
+ public ContainersMonitor getContainersMonitor() {
+ return this.context.getContainerManager().getContainersMonitor();
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java
new file mode 100644
index 0000000..460aaeb
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
+
+import org.apache.hadoop.yarn.event.AbstractEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container
+ .Container;
+
+/**
+ * Events consumed by the {@link ContainerScheduler}.
+ */
+public class ContainerSchedulerEvent extends
+ AbstractEvent {
+
+ private final Container container;
+
+ /**
+ * Create instance of Event.
+ * @param container Container.
+ * @param eventType EventType.
+ */
+ public ContainerSchedulerEvent(Container container,
+ ContainerSchedulerEventType eventType) {
+ super(eventType);
+ this.container = container;
+ }
+
+ /**
+ * Get the container associated with the event.
+ * @return Container.
+ */
+ public Container getContainer() {
+ return container;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java
new file mode 100644
index 0000000..ade1c3e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
+
+/**
+ * Event types associated with {@link ContainerSchedulerEvent}.
+ */
+public enum ContainerSchedulerEventType {
+ SCHEDULE_CONTAINER,
+ CONTAINER_COMPLETED,
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationManager.java
new file mode 100644
index 0000000..66c270f
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationManager.java
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceUtilization;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class abstracts out how a container contributes to Resource Utilization.
+ * It is used by the {@link ContainerScheduler} to determine which
+ * OPPORTUNISTIC containers to be killed to make room for a GUARANTEED
+ * container.
+ * It currently equates resource utilization with the total resource allocated
+ * to the container. Another implementation might choose to use the actual
+ * resource utilization.
+ */
+
+public class ResourceUtilizationManager {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ResourceUtilizationManager.class);
+
+ private ResourceUtilization containersAllocation;
+ private ContainerScheduler scheduler;
+
+ ResourceUtilizationManager(ContainerScheduler scheduler) {
+ this.containersAllocation = ResourceUtilization.newInstance(0, 0, 0.0f);
+ this.scheduler = scheduler;
+ }
+
+ /**
+ * Get the current accumulated utilization. Currently it is the accumulation
+ * of totally allocated resources to a container.
+ * @return ResourceUtilization Resource Utilization.
+ */
+ public ResourceUtilization getCurrentUtilization() {
+ return this.containersAllocation;
+ }
+
+ /**
+ * Add Container's resources to the accumulated Utilization.
+ * @param container Container.
+ */
+ public void addContainerResources(Container container) {
+ increaseResourceUtilization(
+ getContainersMonitor(), this.containersAllocation,
+ container.getResource());
+ }
+
+ /**
+ * Subtract Container's resources to the accumulated Utilization.
+ * @param container Container.
+ */
+ public void subtractContainerResource(Container container) {
+ decreaseResourceUtilization(
+ getContainersMonitor(), this.containersAllocation,
+ container.getResource());
+ }
+
+ /**
+ * Check if NM has resources available currently to run the container.
+ * @param container Container.
+ * @return True, if NM has resources available currently to run the container.
+ */
+ public boolean hasResourcesAvailable(Container container) {
+ long pMemBytes = container.getResource().getMemorySize() * 1024 * 1024L;
+ return hasResourcesAvailable(pMemBytes,
+ (long) (getContainersMonitor().getVmemRatio()* pMemBytes),
+ container.getResource().getVirtualCores());
+ }
+
+ private boolean hasResourcesAvailable(long pMemBytes, long vMemBytes,
+ int cpuVcores) {
+ // Check physical memory.
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("pMemCheck [current={} + asked={} > allowed={}]",
+ this.containersAllocation.getPhysicalMemory(),
+ (pMemBytes >> 20),
+ (getContainersMonitor().getPmemAllocatedForContainers() >> 20));
+ }
+ if (this.containersAllocation.getPhysicalMemory() +
+ (int) (pMemBytes >> 20) >
+ (int) (getContainersMonitor()
+ .getPmemAllocatedForContainers() >> 20)) {
+ return false;
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("before vMemCheck" +
+ "[isEnabled={}, current={} + asked={} > allowed={}]",
+ getContainersMonitor().isVmemCheckEnabled(),
+ this.containersAllocation.getVirtualMemory(), (vMemBytes >> 20),
+ (getContainersMonitor().getVmemAllocatedForContainers() >> 20));
+ }
+ // Check virtual memory.
+ if (getContainersMonitor().isVmemCheckEnabled() &&
+ this.containersAllocation.getVirtualMemory() +
+ (int) (vMemBytes >> 20) >
+ (int) (getContainersMonitor()
+ .getVmemAllocatedForContainers() >> 20)) {
+ return false;
+ }
+
+ float vCores = (float) cpuVcores /
+ getContainersMonitor().getVCoresAllocatedForContainers();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("before cpuCheck [asked={} > allowed={}]",
+ this.containersAllocation.getCPU(), vCores);
+ }
+ // Check CPU.
+ if (this.containersAllocation.getCPU() + vCores > 1.0f) {
+ return false;
+ }
+ return true;
+ }
+
+ public ContainersMonitor getContainersMonitor() {
+ return this.scheduler.getContainersMonitor();
+ }
+
+ public static void increaseResourceUtilization(
+ ContainersMonitor containersMonitor, ResourceUtilization resourceAlloc,
+ Resource resource) {
+ float vCores = (float) resource.getVirtualCores() /
+ containersMonitor.getVCoresAllocatedForContainers();
+ int vmem = (int) (resource.getMemorySize()
+ * containersMonitor.getVmemRatio());
+ resourceAlloc.addTo((int)resource.getMemorySize(), vmem, vCores);
+ }
+
+ public static void decreaseResourceUtilization(
+ ContainersMonitor containersMonitor, ResourceUtilization resourceAlloc,
+ Resource resource) {
+ float vCores = (float) resource.getVirtualCores() /
+ containersMonitor.getVCoresAllocatedForContainers();
+ int vmem = (int) (resource.getMemorySize()
+ * containersMonitor.getVmemRatio());
+ resourceAlloc.subtractFrom((int)resource.getMemorySize(), vmem, vCores);
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
index 35e7593..e1a9995 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java
@@ -21,7 +21,6 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
-import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
@@ -38,7 +37,7 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
-import org.apache.hadoop.yarn.util.ConverterUtils;
+
import org.apache.hadoop.yarn.webapp.NotFoundException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -149,7 +148,7 @@ private static void checkAccess(String remoteUser, Application application,
private static void checkState(ContainerState state) {
if (state == ContainerState.NEW || state == ContainerState.LOCALIZING ||
- state == ContainerState.LOCALIZED) {
+ state == ContainerState.SCHEDULED) {
throw new NotFoundException("Container is not yet running. Current state is "
+ state);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
index 3b84a78..7ca286d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
@@ -158,7 +158,7 @@ public long getRMIdentifier() {
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cID,
- ContainerState.RUNNING);
+ ContainerState.RUNNING, ContainerState.SCHEDULED);
List containerIds = new ArrayList();
containerIds.add(cID);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
index f6593f9..04cfae9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
@@ -454,6 +454,14 @@ protected void rebootNodeStatusUpdaterAndRegisterWithRM() {
if (containersShouldBePreserved) {
Assert.assertFalse(containers.isEmpty());
Assert.assertTrue(containers.containsKey(existingCid));
+ ContainerState state = containers.get(existingCid)
+ .cloneAndGetContainerStatus().getState();
+ // Wait till RUNNING state...
+ int counter = 50;
+ while (state != ContainerState.RUNNING && counter > 0) {
+ Thread.sleep(100);
+ counter--;
+ }
Assert.assertEquals(ContainerState.RUNNING,
containers.get(existingCid)
.cloneAndGetContainerStatus().getState());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
index b3ad318..03e06d2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
@@ -28,6 +28,7 @@
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -255,7 +256,9 @@ public ContainerManagementProtocol run() {
GetContainerStatusesRequest.newInstance(containerIds);
ContainerStatus containerStatus =
containerManager.getContainerStatuses(request).getContainerStatuses().get(0);
- Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState());
+ Assert.assertTrue(
+ EnumSet.of(ContainerState.RUNNING, ContainerState.SCHEDULED)
+ .contains(containerStatus.getState()));
}
public static ContainerId createContainerId() {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 977cb76..a3cde57 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -65,7 +65,6 @@
import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -1080,128 +1079,6 @@ public ContainerState getCurrentState() {
Assert.assertTrue(containerIdSet.contains(runningContainerId));
}
- @Test(timeout = 90000)
- public void testKilledQueuedContainers() throws Exception {
- NodeManager nm = new NodeManager();
- YarnConfiguration conf = new YarnConfiguration();
- conf.set(
- NodeStatusUpdaterImpl
- .YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS,
- "10000");
- nm.init(conf);
- NodeStatusUpdaterImpl nodeStatusUpdater =
- (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater();
- ApplicationId appId = ApplicationId.newInstance(0, 0);
- ApplicationAttemptId appAttemptId =
- ApplicationAttemptId.newInstance(appId, 0);
-
- // Add application to context.
- nm.getNMContext().getApplications().putIfAbsent(appId,
- mock(Application.class));
-
- // Create a running container and add it to the context.
- ContainerId runningContainerId =
- ContainerId.newContainerId(appAttemptId, 1);
- Token runningContainerToken =
- BuilderUtils.newContainerToken(runningContainerId, 0, "anyHost",
- 1234, "anyUser", BuilderUtils.newResource(1024, 1), 0, 123,
- "password".getBytes(), 0);
- Container runningContainer =
- new ContainerImpl(conf, null, null, null, null,
- BuilderUtils.newContainerTokenIdentifier(runningContainerToken),
- nm.getNMContext()) {
- @Override
- public ContainerState getCurrentState() {
- return ContainerState.RUNNING;
- }
-
- @Override
- public org.apache.hadoop.yarn.server.nodemanager.containermanager.
- container.ContainerState getContainerState() {
- return org.apache.hadoop.yarn.server.nodemanager.containermanager.
- container.ContainerState.RUNNING;
- }
- };
-
- nm.getNMContext().getContainers()
- .put(runningContainerId, runningContainer);
-
- // Create two killed queued containers and add them to the queuing context.
- ContainerId killedQueuedContainerId1 = ContainerId.newContainerId(
- appAttemptId, 2);
- ContainerTokenIdentifier killedQueuedContainerTokenId1 = BuilderUtils
- .newContainerTokenIdentifier(BuilderUtils.newContainerToken(
- killedQueuedContainerId1, 0, "anyHost", 1234, "anyUser",
- BuilderUtils.newResource(1024, 1), 0, 123,
- "password".getBytes(), 0));
- ContainerId killedQueuedContainerId2 = ContainerId.newContainerId(
- appAttemptId, 3);
- ContainerTokenIdentifier killedQueuedContainerTokenId2 = BuilderUtils
- .newContainerTokenIdentifier(BuilderUtils.newContainerToken(
- killedQueuedContainerId2, 0, "anyHost", 1234, "anyUser",
- BuilderUtils.newResource(1024, 1), 0, 123,
- "password".getBytes(), 0));
-
- nm.getNMContext().getQueuingContext().getKilledQueuedContainers().put(
- killedQueuedContainerTokenId1, "Queued container killed.");
- nm.getNMContext().getQueuingContext().getKilledQueuedContainers().put(
- killedQueuedContainerTokenId2, "Queued container killed.");
-
- List containerStatuses = nodeStatusUpdater
- .getContainerStatuses();
-
- Assert.assertEquals(3, containerStatuses.size());
-
- ContainerStatus runningContainerStatus = null;
- ContainerStatus killedQueuedContainerStatus1 = null;
- ContainerStatus killedQueuedContainerStatus2 = null;
- for (ContainerStatus cStatus : containerStatuses) {
- if (ContainerState.RUNNING == cStatus.getState()) {
- runningContainerStatus = cStatus;
- }
- if (ContainerState.COMPLETE == cStatus.getState()) {
- if (killedQueuedContainerId1.equals(cStatus.getContainerId())) {
- killedQueuedContainerStatus1 = cStatus;
- } else {
- killedQueuedContainerStatus2 = cStatus;
- }
- }
- }
-
- // Check container IDs and Container Status.
- Assert.assertNotNull(runningContainerId);
- Assert.assertNotNull(killedQueuedContainerId1);
- Assert.assertNotNull(killedQueuedContainerId2);
-
- // Killed queued container should have ABORTED exit status.
- Assert.assertEquals(ContainerExitStatus.ABORTED,
- killedQueuedContainerStatus1.getExitStatus());
- Assert.assertEquals(ContainerExitStatus.ABORTED,
- killedQueuedContainerStatus2.getExitStatus());
-
- // Killed queued container should appear in the recentlyStoppedContainers.
- Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(
- killedQueuedContainerId1));
- Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(
- killedQueuedContainerId2));
-
- // Check if killed queued containers are successfully removed from the
- // queuing context.
- List ackedContainers = new ArrayList();
- ackedContainers.add(killedQueuedContainerId1);
- ackedContainers.add(killedQueuedContainerId2);
-
- nodeStatusUpdater.removeOrTrackCompletedContainersFromContext(
- ackedContainers);
-
- containerStatuses = nodeStatusUpdater.getContainerStatuses();
-
- // Only the running container should be in the container statuses now.
- Assert.assertEquals(1, containerStatuses.size());
- Assert.assertEquals(ContainerState.RUNNING,
- containerStatuses.get(0).getState());
- }
-
@Test(timeout = 10000)
public void testCompletedContainersIsRecentlyStopped() throws Exception {
NodeManager nm = new NodeManager();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
index 031300f..7f96947 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
@@ -67,6 +67,7 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator;
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
@@ -153,7 +154,7 @@ protected MockAMRMProxyService createAndStartAMRMProxyService() {
* rest. So the responses returned can be less than the number of end points
* specified
*
- * @param testContext
+ * @param testContexts
* @param func
* @return
*/
@@ -697,11 +698,6 @@ public NodeStatusUpdater getNodeStatusUpdater() {
return null;
}
- @Override
- public QueuingContext getQueuingContext() {
- return null;
- }
-
public boolean isDistributedSchedulingEnabled() {
return false;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
index d359c3d..6695889 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
@@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -192,10 +193,10 @@ public void setup() throws IOException {
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
// Default delSrvc
+ exec = createContainerExecutor();
delSrvc = createDeletionService();
delSrvc.init(conf);
- exec = createContainerExecutor();
dirsHandler = new LocalDirsHandlerService();
nodeHealthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
@@ -284,34 +285,38 @@ public void tearDown() throws IOException, InterruptedException {
.build());
}
- public static void waitForContainerState(ContainerManagementProtocol containerManager,
- ContainerId containerID, ContainerState finalState)
- throws InterruptedException, YarnException, IOException {
- waitForContainerState(containerManager, containerID, finalState, 20);
+ public static void waitForContainerState(
+ ContainerManagementProtocol containerManager, ContainerId containerID,
+ ContainerState... finalStates) throws InterruptedException, YarnException,
+ IOException {
+ waitForContainerState(containerManager, containerID, 20, finalStates);
}
- public static void waitForContainerState(ContainerManagementProtocol containerManager,
- ContainerId containerID, ContainerState finalState, int timeOutMax)
- throws InterruptedException, YarnException, IOException {
+ public static void waitForContainerState(
+ ContainerManagementProtocol containerManager, ContainerId containerID,
+ int timeOutMax, ContainerState... finalStates)
+ throws InterruptedException, YarnException, IOException {
List list = new ArrayList();
list.add(containerID);
GetContainerStatusesRequest request =
GetContainerStatusesRequest.newInstance(list);
ContainerStatus containerStatus = null;
+ HashSet fStates =
+ new HashSet<>(Arrays.asList(finalStates));
int timeoutSecs = 0;
do {
Thread.sleep(2000);
containerStatus =
containerManager.getContainerStatuses(request)
.getContainerStatuses().get(0);
- LOG.info("Waiting for container to get into state " + finalState
+ LOG.info("Waiting for container to get into one of states " + fStates
+ ". Current state is " + containerStatus.getState());
timeoutSecs += 2;
- } while (!containerStatus.getState().equals(finalState)
+ } while (!fStates.contains(containerStatus.getState())
&& timeoutSecs < timeOutMax);
LOG.info("Container state is " + containerStatus.getState());
- Assert.assertEquals("ContainerState is not correct (timedout)",
- finalState, containerStatus.getState());
+ Assert.assertTrue("ContainerState is not correct (timedout)",
+ fStates.contains(containerStatus.getState()));
}
static void waitForApplicationState(ContainerManagerImpl containerManager,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index 0c083f2..a245799 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -232,7 +232,7 @@ public void testContainerSetup() throws Exception {
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cId,
- ContainerState.COMPLETE, 40);
+ 40, ContainerState.COMPLETE);
// Now ascertain that the resources are localised correctly.
ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
index 32dddae..fd5fdf4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
@@ -94,6 +94,9 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
@@ -101,7 +104,6 @@
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
-import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.Before;
import org.junit.Test;
@@ -551,6 +553,35 @@ protected void authorizeGetAndStopContainerRequest(
throw new YarnException("Reject this container");
}
}
+ @Override
+ protected ContainerScheduler createContainerScheduler(Context context) {
+ return new ContainerScheduler(context){
+ @Override
+ public ContainersMonitor getContainersMonitor() {
+ return new ContainersMonitorImpl(null, null, null) {
+ @Override
+ public float getVmemRatio() {
+ return 2.0f;
+ }
+
+ @Override
+ public long getVmemAllocatedForContainers() {
+ return 20480;
+ }
+
+ @Override
+ public long getPmemAllocatedForContainers() {
+ return 10240;
+ }
+
+ @Override
+ public long getVCoresAllocatedForContainers() {
+ return 4;
+ }
+ };
+ }
+ };
+ }
};
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java
deleted file mode 100644
index 71af76f..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.server.nodemanager.containermanager;
-
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.UnsupportedFileSystemException;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.security.NMTokenIdentifier;
-import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl;
-
-/**
- * Test class that invokes all test cases of {@link TestContainerManager} while
- * using the {@link QueuingContainerManagerImpl}. The goal is to assert that
- * no regression is introduced in the existing cases when no queuing of tasks at
- * the NMs is involved.
- */
-public class TestContainerManagerRegression extends TestContainerManager {
-
- public TestContainerManagerRegression()
- throws UnsupportedFileSystemException {
- super();
- }
-
- static {
- LOG = LogFactory.getLog(TestContainerManagerRegression.class);
- }
-
- @Override
- protected ContainerManagerImpl createContainerManager(
- DeletionService delSrvc) {
- return new QueuingContainerManagerImpl(context, exec, delSrvc,
- nodeStatusUpdater, metrics, dirsHandler) {
- @Override
- public void
- setBlockNewContainerRequests(boolean blockNewContainerRequests) {
- // do nothing
- }
-
- @Override
- protected UserGroupInformation getRemoteUgi() throws YarnException {
- ApplicationId appId = ApplicationId.newInstance(0, 0);
- ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
- appId, 1);
- UserGroupInformation ugi = UserGroupInformation.createRemoteUser(
- appAttemptId.toString());
- ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context
- .getNodeId(), user, context.getNMTokenSecretManager()
- .getCurrentKey().getKeyId()));
- return ugi;
- }
-
- @Override
- protected void authorizeGetAndStopContainerRequest(
- ContainerId containerId, Container container, boolean stopRequest,
- NMTokenIdentifier identifier) throws YarnException {
- if (container == null || container.getUser().equals("Fail")) {
- throw new YarnException("Reject this container");
- }
- }
- };
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 766a1f9..eed3f9d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -27,6 +27,7 @@
import static org.mockito.Mockito.reset;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.atLeastOnce;
import java.io.IOException;
import java.net.URISyntaxException;
@@ -90,6 +91,11 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
+
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
@@ -143,7 +149,7 @@ public void testLocalizationLaunch() throws Exception {
Map> localPaths = wc.localizeResources();
// all resources should be localized
- assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState());
+ assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState());
assertNotNull(wc.c.getLocalizedResources());
for (Entry> loc : wc.c.getLocalizedResources()
.entrySet()) {
@@ -421,7 +427,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerKilled()
wc = new WrappedContainer(17, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
- assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState());
+ assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState());
ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId());
wc.killContainer();
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
@@ -452,7 +458,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerSuccess()
wc = new WrappedContainer(17, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
- assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState());
+ assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
wc.containerSuccessful();
@@ -480,7 +486,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerFailure()
wc = new WrappedContainer(17, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
- assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState());
+ assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
wc.containerFailed(ExitCode.FORCE_KILLED.getExitCode());
@@ -507,7 +513,7 @@ public void testKillOnLocalizedWhenContainerLaunched() throws Exception {
wc = new WrappedContainer(17, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
- assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState());
+ assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState());
ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId());
launcher.call();
wc.drainDispatcherEvents();
@@ -764,7 +770,7 @@ private void verifyCleanupCall(WrappedContainer wc) throws Exception {
new ResourcesReleasedMatcher(wc.localResources, EnumSet.of(
LocalResourceVisibility.PUBLIC, LocalResourceVisibility.PRIVATE,
LocalResourceVisibility.APPLICATION));
- verify(wc.localizerBus).handle(argThat(matchesReq));
+ verify(wc.localizerBus, atLeastOnce()).handle(argThat(matchesReq));
}
private void verifyOutofBandHeartBeat(WrappedContainer wc) {
@@ -890,6 +896,7 @@ public boolean matches(Object o) {
final EventHandler auxBus;
final EventHandler appBus;
final EventHandler LogBus;
+ final EventHandler schedBus;
final ContainersLauncher launcher;
final ContainerLaunchContext ctxt;
@@ -927,9 +934,16 @@ public boolean matches(Object o) {
auxBus = mock(EventHandler.class);
appBus = mock(EventHandler.class);
LogBus = mock(EventHandler.class);
+ schedBus = new ContainerScheduler(context, 0) {
+ @Override
+ protected void scheduleContainer(Container container) {
+ container.sendLaunchEvent();
+ }
+ };
dispatcher.register(LocalizationEventType.class, localizerBus);
dispatcher.register(ContainersLauncherEventType.class, launcherBus);
dispatcher.register(ContainersMonitorEventType.class, monitorBus);
+ dispatcher.register(ContainerSchedulerEventType.class, schedBus);
dispatcher.register(AuxServicesEventType.class, auxBus);
dispatcher.register(ApplicationEventType.class, appBus);
dispatcher.register(LogHandlerEventType.class, LogBus);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
index 0f1c6f5..6a6e106 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
@@ -303,7 +303,7 @@ public void testContainerKillOnMemoryOverflow() throws IOException,
Assert.assertEquals(null, reader.readLine());
BaseContainerManagerTest.waitForContainerState(containerManager, cId,
- ContainerState.COMPLETE, 60);
+ 60, ContainerState.COMPLETE);
List containerIds = new ArrayList();
containerIds.add(cId);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java
deleted file mode 100644
index caebef7..0000000
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java
+++ /dev/null
@@ -1,594 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.UnsupportedFileSystemException;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.hadoop.yarn.api.records.ContainerStatus;
-import org.apache.hadoop.yarn.api.records.ExecutionType;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.security.NMTokenIdentifier;
-import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
-import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
-import org.apache.hadoop.yarn.server.utils.BuilderUtils;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Class for testing the {@link QueuingContainerManagerImpl}.
- */
-public class TestQueuingContainerManager extends BaseContainerManagerTest {
- public TestQueuingContainerManager() throws UnsupportedFileSystemException {
- super();
- }
-
- static {
- LOG = LogFactory.getLog(TestQueuingContainerManager.class);
- }
-
- boolean shouldDeleteWait = false;
-
- @Override
- protected ContainerManagerImpl createContainerManager(
- DeletionService delSrvc) {
- return new QueuingContainerManagerImpl(context, exec, delSrvc,
- nodeStatusUpdater, metrics, dirsHandler) {
- @Override
- public void
- setBlockNewContainerRequests(boolean blockNewContainerRequests) {
- // do nothing
- }
-
- @Override
- protected UserGroupInformation getRemoteUgi() throws YarnException {
- ApplicationId appId = ApplicationId.newInstance(0, 0);
- ApplicationAttemptId appAttemptId =
- ApplicationAttemptId.newInstance(appId, 1);
- UserGroupInformation ugi =
- UserGroupInformation.createRemoteUser(appAttemptId.toString());
- ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context
- .getNodeId(), user, context.getNMTokenSecretManager().getCurrentKey()
- .getKeyId()));
- return ugi;
- }
-
- @Override
- protected ContainersMonitor createContainersMonitor(
- ContainerExecutor exec) {
- return new ContainersMonitorImpl(exec, dispatcher, this.context) {
- // Define resources available for containers to be executed.
- @Override
- public long getPmemAllocatedForContainers() {
- return 2048 * 1024 * 1024L;
- }
-
- @Override
- public long getVmemAllocatedForContainers() {
- float pmemRatio = getConfig().getFloat(
- YarnConfiguration.NM_VMEM_PMEM_RATIO,
- YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
- return (long) (pmemRatio * getPmemAllocatedForContainers());
- }
-
- @Override
- public long getVCoresAllocatedForContainers() {
- return 4;
- }
- };
- }
- };
- }
-
- @Override
- protected DeletionService createDeletionService() {
- return new DeletionService(exec) {
- @Override
- public void delete(String user, Path subDir, Path... baseDirs) {
- // Don't do any deletions.
- if (shouldDeleteWait) {
- try {
- Thread.sleep(10000);
- LOG.info("\n\nSleeping Pseudo delete : user - " + user + ", " +
- "subDir - " + subDir + ", " +
- "baseDirs - " + Arrays.asList(baseDirs));
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- } else {
- LOG.info("\n\nPseudo delete : user - " + user + ", " +
- "subDir - " + subDir + ", " +
- "baseDirs - " + Arrays.asList(baseDirs));
- }
- }
- };
- }
-
- @Override
- public void setup() throws IOException {
- super.setup();
- shouldDeleteWait = false;
- }
-
- /**
- * Starting one GUARANTEED and one OPPORTUNISTIC container.
- * @throws Exception
- */
- @Test
- public void testStartMultipleContainers() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(1024, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.GUARANTEED)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(1024, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- BaseContainerManagerTest.waitForContainerState(containerManager,
- createContainerId(0),
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING);
- BaseContainerManagerTest.waitForContainerState(containerManager,
- createContainerId(1),
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING);
-
- // Ensure all containers are running.
- List statList = new ArrayList();
- for (int i = 0; i < 2; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest =
- GetContainerStatusesRequest.newInstance(statList);
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- status.getState());
- }
- }
-
- /**
- * Submit both a GUARANTEED and an OPPORTUNISTIC container, each of which
- * requires more resources than available at the node, and make sure they
- * are both queued.
- * @throws Exception
- */
- @Test
- public void testQueueMultipleContainers() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(3072, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.GUARANTEED)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(3072, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- Thread.sleep(5000);
-
- // Ensure both containers are queued.
- List statList = new ArrayList();
- for (int i = 0; i < 2; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest =
- GetContainerStatusesRequest.newInstance(statList);
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED,
- status.getState());
- }
-
- // Ensure both containers are properly queued.
- Assert.assertEquals(2, containerManager.getContext().getQueuingContext()
- .getQueuedContainers().size());
- Assert.assertEquals(1, ((QueuingContainerManagerImpl) containerManager)
- .getNumQueuedGuaranteedContainers());
- Assert.assertEquals(1, ((QueuingContainerManagerImpl) containerManager)
- .getNumQueuedOpportunisticContainers());
- }
-
- /**
- * Starts one OPPORTUNISTIC container that takes up the whole node's
- * resources, and submit two more that will be queued.
- * @throws Exception
- */
- @Test
- public void testStartAndQueueMultipleContainers() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(2048, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(1024, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(1024, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- Thread.sleep(5000);
-
- // Ensure first container is running and others are queued.
- List statList = new ArrayList();
- for (int i = 0; i < 3; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest = GetContainerStatusesRequest
- .newInstance(Arrays.asList(createContainerId(0)));
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- if (status.getContainerId().equals(createContainerId(0))) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- status.getState());
- } else {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED,
- status.getState());
- }
- }
-
- // Ensure two containers are properly queued.
- Assert.assertEquals(2, containerManager.getContext().getQueuingContext()
- .getQueuedContainers().size());
- Assert.assertEquals(0, ((QueuingContainerManagerImpl) containerManager)
- .getNumQueuedGuaranteedContainers());
- Assert.assertEquals(2, ((QueuingContainerManagerImpl) containerManager)
- .getNumQueuedOpportunisticContainers());
- }
-
- /**
- * Submit two OPPORTUNISTIC and one GUARANTEED containers. The resources
- * requests by each container as such that only one can run in parallel.
- * Thus, the OPPORTUNISTIC container that started running, will be
- * killed for the GUARANTEED container to start.
- * Once the GUARANTEED container finishes its execution, the remaining
- * OPPORTUNISTIC container will be executed.
- * @throws Exception
- */
- @Test
- public void testKillOpportunisticForGuaranteedContainer() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(2048, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(2048, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(2048, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.GUARANTEED)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- BaseContainerManagerTest.waitForNMContainerState(containerManager,
- createContainerId(0), ContainerState.DONE, 40);
- Thread.sleep(5000);
-
- // Get container statuses. Container 0 should be killed, container 1
- // should be queued and container 2 should be running.
- List statList = new ArrayList();
- for (int i = 0; i < 3; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest =
- GetContainerStatusesRequest.newInstance(statList);
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- if (status.getContainerId().equals(createContainerId(0))) {
- Assert.assertTrue(status.getDiagnostics()
- .contains("Container killed by the ApplicationMaster"));
- } else if (status.getContainerId().equals(createContainerId(1))) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED,
- status.getState());
- } else if (status.getContainerId().equals(createContainerId(2))) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- status.getState());
- }
- System.out.println("\nStatus : [" + status + "]\n");
- }
-
- // Make sure the remaining OPPORTUNISTIC container starts its execution.
- BaseContainerManagerTest.waitForNMContainerState(containerManager,
- createContainerId(2), ContainerState.DONE, 40);
- Thread.sleep(5000);
- statRequest = GetContainerStatusesRequest.newInstance(Arrays.asList(
- createContainerId(1)));
- ContainerStatus contStatus1 = containerManager.getContainerStatuses(
- statRequest).getContainerStatuses().get(0);
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- contStatus1.getState());
- }
-
- /**
- * Submit three OPPORTUNISTIC containers that can run concurrently, and one
- * GUARANTEED that needs to kill two of the OPPORTUNISTIC for it to run.
- * @throws Exception
- */
- @Test
- public void testKillMultipleOpportunisticContainers() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(512, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(512, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(512, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(3), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(1500, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.GUARANTEED)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- BaseContainerManagerTest.waitForNMContainerState(containerManager,
- createContainerId(0), ContainerState.DONE, 40);
- Thread.sleep(5000);
-
- // Get container statuses. Container 0 should be killed, container 1
- // should be queued and container 2 should be running.
- int killedContainers = 0;
- List statList = new ArrayList();
- for (int i = 0; i < 4; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest =
- GetContainerStatusesRequest.newInstance(statList);
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- if (status.getDiagnostics().contains(
- "Container killed by the ApplicationMaster")) {
- killedContainers++;
- }
- System.out.println("\nStatus : [" + status + "]\n");
- }
-
- Assert.assertEquals(2, killedContainers);
- }
-
- /**
- * Start running one GUARANTEED container and queue two OPPORTUNISTIC ones.
- * Try killing one of the two queued containers.
- * @throws Exception
- */
- @Test
- public void testStopQueuedContainer() throws Exception {
- shouldDeleteWait = true;
- containerManager.start();
-
- ContainerLaunchContext containerLaunchContext =
- recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
- List list = new ArrayList<>();
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(2048, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.GUARANTEED)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(512, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
- list.add(StartContainerRequest.newInstance(
- containerLaunchContext,
- createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
- context.getNodeId(),
- user, BuilderUtils.newResource(512, 1),
- context.getContainerTokenSecretManager(), null,
- ExecutionType.OPPORTUNISTIC)));
-
- StartContainersRequest allRequests =
- StartContainersRequest.newInstance(list);
- containerManager.startContainers(allRequests);
-
- Thread.sleep(2000);
-
- // Assert there is initially one container running and two queued.
- int runningContainersNo = 0;
- int queuedContainersNo = 0;
- List statList = new ArrayList();
- for (int i = 0; i < 3; i++) {
- statList.add(createContainerId(i));
- }
- GetContainerStatusesRequest statRequest = GetContainerStatusesRequest
- .newInstance(statList);
- List containerStatuses = containerManager
- .getContainerStatuses(statRequest).getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- if (status.getState() ==
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) {
- runningContainersNo++;
- } else if (status.getState() ==
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED) {
- queuedContainersNo++;
- }
- System.out.println("\nStatus : [" + status + "]\n");
- }
-
- Assert.assertEquals(1, runningContainersNo);
- Assert.assertEquals(2, queuedContainersNo);
-
- // Stop one of the two queued containers.
- StopContainersRequest stopRequest = StopContainersRequest.
- newInstance(Arrays.asList(createContainerId(1)));
- containerManager.stopContainers(stopRequest);
-
- Thread.sleep(2000);
-
- // Assert queued container got properly stopped.
- statList.clear();
- for (int i = 0; i < 3; i++) {
- statList.add(createContainerId(i));
- }
- statRequest = GetContainerStatusesRequest.newInstance(statList);
- containerStatuses = containerManager.getContainerStatuses(statRequest)
- .getContainerStatuses();
- for (ContainerStatus status : containerStatuses) {
- if (status.getContainerId().equals(createContainerId(0))) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
- status.getState());
- } else if (status.getContainerId().equals(createContainerId(1))) {
- Assert.assertTrue(status.getDiagnostics().contains(
- "Queued container request removed"));
- } else if (status.getContainerId().equals(createContainerId(2))) {
- Assert.assertEquals(
- org.apache.hadoop.yarn.api.records.ContainerState.QUEUED,
- status.getState());
- }
- System.out.println("\nStatus : [" + status + "]\n");
- }
- }
-}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java
new file mode 100644
index 0000000..07ce60c
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java
@@ -0,0 +1,678 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.security.NMTokenIdentifier;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.mockito.Mockito.spy;
+
+/**
+ * Tests to verify that the {@link ContainerScheduler} is able to queue and
+ * make room for containers.
+ */
+public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
+ public TestContainerSchedulerQueuing() throws UnsupportedFileSystemException {
+ super();
+ }
+
+ static {
+ LOG = LogFactory.getLog(TestContainerSchedulerQueuing.class);
+ }
+
+ private boolean delayContainers = true;
+
+ @Override
+ protected ContainerManagerImpl createContainerManager(
+ DeletionService delSrvc) {
+ return new ContainerManagerImpl(context, exec, delSrvc,
+ nodeStatusUpdater, metrics, dirsHandler) {
+ @Override
+ public void
+ setBlockNewContainerRequests(boolean blockNewContainerRequests) {
+ // do nothing
+ }
+
+ @Override
+ protected UserGroupInformation getRemoteUgi() throws YarnException {
+ ApplicationId appId = ApplicationId.newInstance(0, 0);
+ ApplicationAttemptId appAttemptId =
+ ApplicationAttemptId.newInstance(appId, 1);
+ UserGroupInformation ugi =
+ UserGroupInformation.createRemoteUser(appAttemptId.toString());
+ ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context
+ .getNodeId(), user, context.getNMTokenSecretManager().getCurrentKey()
+ .getKeyId()));
+ return ugi;
+ }
+
+ @Override
+ protected ContainersMonitor createContainersMonitor(
+ ContainerExecutor exec) {
+ return new ContainersMonitorImpl(exec, dispatcher, this.context) {
+ // Define resources available for containers to be executed.
+ @Override
+ public long getPmemAllocatedForContainers() {
+ return 2048 * 1024 * 1024L;
+ }
+
+ @Override
+ public long getVmemAllocatedForContainers() {
+ float pmemRatio = getConfig().getFloat(
+ YarnConfiguration.NM_VMEM_PMEM_RATIO,
+ YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
+ return (long) (pmemRatio * getPmemAllocatedForContainers());
+ }
+
+ @Override
+ public long getVCoresAllocatedForContainers() {
+ return 4;
+ }
+ };
+ }
+ };
+ }
+
+ @Override
+ protected ContainerExecutor createContainerExecutor() {
+ DefaultContainerExecutor exec = new DefaultContainerExecutor() {
+ @Override
+ public int launchContainer(ContainerStartContext ctx) throws IOException {
+ if (delayContainers) {
+ try {
+ Thread.sleep(10000);
+ } catch (InterruptedException e) {
+ // Nothing..
+ }
+ }
+ return super.launchContainer(ctx);
+ }
+ };
+ exec.setConf(conf);
+ return spy(exec);
+ }
+
+ @Override
+ public void setup() throws IOException {
+ conf.setInt(
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10);
+ super.setup();
+ }
+
+ /**
+ * Starting one GUARANTEED and one OPPORTUNISTIC container.
+ * @throws Exception
+ */
+ @Test
+ public void testStartMultipleContainers() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(1024, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(1024, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ BaseContainerManagerTest.waitForContainerState(containerManager,
+ createContainerId(0),
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING);
+ BaseContainerManagerTest.waitForContainerState(containerManager,
+ createContainerId(1),
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING);
+
+ // Ensure all containers are running.
+ List statList = new ArrayList();
+ for (int i = 0; i < 2; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest =
+ GetContainerStatusesRequest.newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
+ status.getState());
+ }
+ }
+
+ /**
+ * Submit both a GUARANTEED and an OPPORTUNISTIC container, each of which
+ * requires more resources than available at the node, and make sure they
+ * are both queued.
+ * @throws Exception
+ */
+ @Test
+ public void testQueueMultipleContainers() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(3072, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(3072, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ Thread.sleep(5000);
+
+ // Ensure both containers are queued.
+ List statList = new ArrayList();
+ for (int i = 0; i < 2; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest =
+ GetContainerStatusesRequest.newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED,
+ status.getState());
+ }
+
+ ContainerScheduler containerScheduler =
+ containerManager.getContainerScheduler();
+ // Ensure both containers are properly queued.
+ Assert.assertEquals(2, containerScheduler.getNumQueuedContainers());
+ Assert.assertEquals(1,
+ containerScheduler.getNumQueuedGuaranteedContainers());
+ Assert.assertEquals(1,
+ containerScheduler.getNumQueuedOpportunisticContainers());
+ }
+
+ /**
+ * Starts one OPPORTUNISTIC container that takes up the whole node's
+ * resources, and submit two more that will be queued.
+ * @throws Exception
+ */
+ @Test
+ public void testStartAndQueueMultipleContainers() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(2048, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(1024, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(1024, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ Thread.sleep(5000);
+
+ // Ensure first container is running and others are queued.
+ List statList = new ArrayList();
+ for (int i = 0; i < 3; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest = GetContainerStatusesRequest
+ .newInstance(Arrays.asList(createContainerId(0)));
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ if (status.getContainerId().equals(createContainerId(0))) {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
+ status.getState());
+ } else {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED,
+ status.getState());
+ }
+ }
+
+ ContainerScheduler containerScheduler =
+ containerManager.getContainerScheduler();
+ // Ensure two containers are properly queued.
+ Assert.assertEquals(2, containerScheduler.getNumQueuedContainers());
+ Assert.assertEquals(0,
+ containerScheduler.getNumQueuedGuaranteedContainers());
+ Assert.assertEquals(2,
+ containerScheduler.getNumQueuedOpportunisticContainers());
+ }
+
+ /**
+ * Submit two OPPORTUNISTIC and one GUARANTEED containers. The resources
+ * requests by each container as such that only one can run in parallel.
+ * Thus, the OPPORTUNISTIC container that started running, will be
+ * killed for the GUARANTEED container to start.
+ * Once the GUARANTEED container finishes its execution, the remaining
+ * OPPORTUNISTIC container will be executed.
+ * @throws Exception
+ */
+ @Test
+ public void testKillOpportunisticForGuaranteedContainer() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(2048, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(2048, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(2048, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ BaseContainerManagerTest.waitForNMContainerState(containerManager,
+ createContainerId(0), ContainerState.DONE, 40);
+ Thread.sleep(5000);
+
+ // Get container statuses. Container 0 should be killed, container 1
+ // should be queued and container 2 should be running.
+ List statList = new ArrayList();
+ for (int i = 0; i < 3; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest =
+ GetContainerStatusesRequest.newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ if (status.getContainerId().equals(createContainerId(0))) {
+ Assert.assertTrue(status.getDiagnostics().contains(
+ "Container Killed to make room for Guaranteed Container"));
+ } else if (status.getContainerId().equals(createContainerId(1))) {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED,
+ status.getState());
+ } else if (status.getContainerId().equals(createContainerId(2))) {
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
+ status.getState());
+ }
+ System.out.println("\nStatus : [" + status + "]\n");
+ }
+
+ // Make sure the remaining OPPORTUNISTIC container starts its execution.
+ BaseContainerManagerTest.waitForNMContainerState(containerManager,
+ createContainerId(2), ContainerState.DONE, 40);
+ Thread.sleep(5000);
+ statRequest = GetContainerStatusesRequest.newInstance(Arrays.asList(
+ createContainerId(1)));
+ ContainerStatus contStatus1 = containerManager.getContainerStatuses(
+ statRequest).getContainerStatuses().get(0);
+ Assert.assertEquals(
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
+ contStatus1.getState());
+ }
+
+ /**
+ * Submit three OPPORTUNISTIC containers that can run concurrently, and one
+ * GUARANTEED that needs to kill two of the OPPORTUNISTIC for it to run.
+ * @throws Exception
+ */
+ @Test
+ public void testKillMultipleOpportunisticContainers() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(3), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(1500, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+
+ allRequests = StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ BaseContainerManagerTest.waitForNMContainerState(containerManager,
+ createContainerId(0), ContainerState.DONE, 40);
+ Thread.sleep(5000);
+
+ // Get container statuses. Container 0 should be killed, container 1
+ // should be queued and container 2 should be running.
+ int killedContainers = 0;
+ List statList = new ArrayList();
+ for (int i = 0; i < 4; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest =
+ GetContainerStatusesRequest.newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ if (status.getDiagnostics().contains(
+ "Container Killed to make room for Guaranteed Container")) {
+ killedContainers++;
+ }
+ System.out.println("\nStatus : [" + status + "]\n");
+ }
+
+ Assert.assertEquals(2, killedContainers);
+ }
+
+ /**
+ * Submit four OPPORTUNISTIC containers that can run concurrently, and then
+ * two GUARANTEED that needs to kill Exactly two of the OPPORTUNISTIC for
+ * it to run. Make sure only 2 are killed.
+ * @throws Exception
+ */
+ @Test
+ public void testKillOnlyRequiredOpportunisticContainers() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ // Fill NM with Opportunistic containers
+ for (int i = 0; i < 4; i++) {
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ }
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ list = new ArrayList<>();
+ // Now ask for two Guaranteed containers
+ for (int i = 4; i < 6; i++) {
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+ }
+
+ allRequests = StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ BaseContainerManagerTest.waitForNMContainerState(containerManager,
+ createContainerId(0), ContainerState.DONE, 40);
+ Thread.sleep(5000);
+
+ // Get container statuses. Container 0 should be killed, container 1
+ // should be queued and container 2 should be running.
+ int killedContainers = 0;
+ List statList = new ArrayList();
+ for (int i = 0; i < 6; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest =
+ GetContainerStatusesRequest.newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ if (status.getDiagnostics().contains(
+ "Container Killed to make room for Guaranteed Container")) {
+ killedContainers++;
+ }
+ System.out.println("\nStatus : [" + status + "]\n");
+ }
+
+ Assert.assertEquals(2, killedContainers);
+ }
+
+ /**
+ * Start running one GUARANTEED container and queue two OPPORTUNISTIC ones.
+ * Try killing one of the two queued containers.
+ * @throws Exception
+ */
+ @Test
+ public void testStopQueuedContainer() throws Exception {
+ containerManager.start();
+
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+ List list = new ArrayList<>();
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(2048, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.GUARANTEED)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+ list.add(StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER,
+ context.getNodeId(),
+ user, BuilderUtils.newResource(512, 1),
+ context.getContainerTokenSecretManager(), null,
+ ExecutionType.OPPORTUNISTIC)));
+
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+
+ Thread.sleep(2000);
+
+ // Assert there is initially one container running and two queued.
+ int runningContainersNo = 0;
+ int queuedContainersNo = 0;
+ List statList = new ArrayList();
+ for (int i = 0; i < 3; i++) {
+ statList.add(createContainerId(i));
+ }
+ GetContainerStatusesRequest statRequest = GetContainerStatusesRequest
+ .newInstance(statList);
+ List containerStatuses = containerManager
+ .getContainerStatuses(statRequest).getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ if (status.getState() ==
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) {
+ runningContainersNo++;
+ } else if (status.getState() ==
+ org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) {
+ queuedContainersNo++;
+ }
+ System.out.println("\nStatus : [" + status + "]\n");
+ }
+
+ Assert.assertEquals(1, runningContainersNo);
+ Assert.assertEquals(2, queuedContainersNo);
+
+ // Stop one of the two queued containers.
+ StopContainersRequest stopRequest = StopContainersRequest.
+ newInstance(Arrays.asList(createContainerId(1)));
+ containerManager.stopContainers(stopRequest);
+
+ Thread.sleep(2000);
+
+ // Assert queued container got properly stopped.
+ statList.clear();
+ for (int i = 0; i < 3; i++) {
+ statList.add(createContainerId(i));
+ }
+
+ statRequest = GetContainerStatusesRequest.newInstance(statList);
+ HashMap
+ map = new HashMap<>();
+ for (int i=0; i < 10; i++) {
+ containerStatuses = containerManager.getContainerStatuses(statRequest)
+ .getContainerStatuses();
+ for (ContainerStatus status : containerStatuses) {
+ System.out.println("\nStatus : [" + status + "]\n");
+ map.put(status.getState(), status);
+ if (map.containsKey(
+ org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) &&
+ map.containsKey(
+ org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) &&
+ map.containsKey(
+ org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE)) {
+ break;
+ }
+ Thread.sleep(1000);
+ }
+ }
+ Assert.assertEquals(createContainerId(0),
+ map.get(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING)
+ .getContainerId());
+ Assert.assertEquals(createContainerId(1),
+ map.get(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE)
+ .getContainerId());
+ Assert.assertEquals(createContainerId(2),
+ map.get(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED)
+ .getContainerId());
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
index 164488d..321d97b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
@@ -215,4 +215,19 @@ public boolean canRollback() {
public void commitUpgrade() {
}
+
+ @Override
+ public boolean isMarkedToKill() {
+ return false;
+ }
+
+ @Override
+ public void sendLaunchEvent() {
+
+ }
+
+ @Override
+ public void sendKillEvent(int exitStatus, String description) {
+
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index 375b4cf..5f11d4a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -1362,7 +1362,8 @@ private void handleContainerStatus(List containerStatuses) {
}
// Process running containers
- if (remoteContainer.getState() == ContainerState.RUNNING) {
+ if (remoteContainer.getState() == ContainerState.RUNNING ||
+ remoteContainer.getState() == ContainerState.SCHEDULED) {
// Process only GUARANTEED containers in the RM.
if (remoteContainer.getExecutionType() == ExecutionType.GUARANTEED) {
++numRemoteRunningContainers;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
index 67b652b..89ebeb1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
@@ -78,7 +78,8 @@
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl;
+
+
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
@@ -723,8 +724,9 @@ protected ContainerManagerImpl createContainerManager(Context context,
ContainerExecutor exec, DeletionService del,
NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager,
LocalDirsHandlerService dirsHandler) {
- if (getConfig().getBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED,
- YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED_DEFAULT)) {
+ if (getConfig().getInt(
+ YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 0)
+ > 0) {
return new CustomQueueingContainerManagerImpl(context, exec, del,
nodeStatusUpdater, metrics, dirsHandler);
} else {
@@ -864,7 +866,7 @@ protected void createAMRMProxyService(Configuration conf) {
}
private class CustomQueueingContainerManagerImpl extends
- QueuingContainerManagerImpl {
+ ContainerManagerImpl {
public CustomQueueingContainerManagerImpl(Context context,
ContainerExecutor exec, DeletionService del, NodeStatusUpdater
@@ -874,25 +876,6 @@ public CustomQueueingContainerManagerImpl(Context context,
}
@Override
- protected ContainersMonitor createContainersMonitor(ContainerExecutor
- exec) {
- return new ContainersMonitorImpl(exec, dispatcher, this.context) {
-
- @Override
- public void increaseContainersAllocation(ProcessTreeInfo pti) { }
-
- @Override
- public void decreaseContainersAllocation(ProcessTreeInfo pti) { }
-
- @Override
- public boolean hasResourcesAvailable(
- ContainersMonitorImpl.ProcessTreeInfo pti) {
- return true;
- }
- };
- }
-
- @Override
protected void createAMRMProxyService(Configuration conf) {
this.amrmProxyEnabled =
conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED,
@@ -910,6 +893,32 @@ protected void createAMRMProxyService(Configuration conf) {
LOG.info("CustomAMRMProxyService is disabled");
}
}
+
+ @Override
+ protected ContainersMonitor createContainersMonitor(ContainerExecutor
+ exec) {
+ return new ContainersMonitorImpl(exec, dispatcher, this.context) {
+ @Override
+ public float getVmemRatio() {
+ return 2.0f;
+ }
+
+ @Override
+ public long getVmemAllocatedForContainers() {
+ return 16 * 1024L * 1024L * 1024L;
+ }
+
+ @Override
+ public long getPmemAllocatedForContainers() {
+ return 8 * 1024L * 1024L * 1024L;
+ }
+
+ @Override
+ public long getVCoresAllocatedForContainers() {
+ return 10;
+ }
+ };
+ }
}
private class ShortCircuitedAMRMProxy extends AMRMProxyService {