diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
index 323d31d..ca9034b 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java
@@ -34,5 +34,8 @@
RUNNING,
/** Completed container */
- COMPLETE
+ COMPLETE,
+
+ /** Queued at the NM */
+ QUEUED
}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
index 5f52f85..69b84bb 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
@@ -43,6 +43,14 @@ public static ResourceUtilization newInstance(int pmem, int vmem, float cpu) {
utilization.setCPU(cpu);
return utilization;
}
+
+ @Public
+ @Unstable
+ public static ResourceUtilization newInstance(
+ ResourceUtilization resourceUtil) {
+ return newInstance(resourceUtil.getPhysicalMemory(),
+ resourceUtil.getVirtualMemory(), resourceUtil.getCPU());
+ }
/**
* Get used virtual memory.
@@ -147,4 +155,18 @@ public void addTo(int pmem, int vmem, float cpu) {
this.setVirtualMemory(this.getVirtualMemory() + vmem);
this.setCPU(this.getCPU() + cpu);
}
+
+ /**
+ * Subtract utilization from the current one.
+ * @param pmem Physical memory to be subtracted.
+ * @param vmem Virtual memory to be subtracted.
+ * @param cpu CPU utilization to be subtracted.
+ */
+ @Public
+ @Unstable
+ public void subtractFrom(int pmem, int vmem, float cpu) {
+ this.setPhysicalMemory(this.getPhysicalMemory() - pmem);
+ this.setVirtualMemory(this.getVirtualMemory() - vmem);
+ this.setCPU(this.getCPU() - cpu);
+ }
}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java
index a7f0ece..be92a21 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java
@@ -34,12 +34,13 @@
public static QueuedContainersStatus newInstance() {
return Records.newRecord(QueuedContainersStatus.class);
}
+
+ public abstract int getQueueLength();
+
+ public abstract void setQueueLength(int queueWaitTime);
public abstract int getEstimatedQueueWaitTime();
public abstract void setEstimatedQueueWaitTime(int queueWaitTime);
-
- public abstract int getWaitQueueLength();
-
- public abstract void setWaitQueueLength(int queueWaitTime);
+
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java
index 54470f4..20e9ffd 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java
@@ -66,15 +66,15 @@ public void setEstimatedQueueWaitTime(int queueWaitTime) {
}
@Override
- public int getWaitQueueLength() {
+ public int getQueueLength() {
YarnServerCommonProtos.QueuedContainersStatusProtoOrBuilder p =
viaProto ? proto : builder;
- return p.getWaitQueueLength();
+ return p.getQueueLength();
}
@Override
- public void setWaitQueueLength(int waitQueueLength) {
+ public void setQueueLength(int waitQueueLength) {
maybeInitBuilder();
- builder.setWaitQueueLength(waitQueueLength);
+ builder.setQueueLength(waitQueueLength);
}
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
index 4fdd43c..ed1f87e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
@@ -45,6 +45,7 @@
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
@@ -215,6 +216,13 @@ public static NodeReport newNodeReport(NodeId nodeId, NodeState nodeState,
public static ContainerStatus newContainerStatus(ContainerId containerId,
ContainerState containerState, String diagnostics, int exitStatus,
Resource capability) {
+ return newContainerStatus(containerId, containerState, diagnostics,
+ exitStatus, capability, ExecutionType.GUARANTEED);
+ }
+
+ public static ContainerStatus newContainerStatus(ContainerId containerId,
+ ContainerState containerState, String diagnostics, int exitStatus,
+ Resource capability, ExecutionType executionType) {
ContainerStatus containerStatus = recordFactory
.newRecordInstance(ContainerStatus.class);
containerStatus.setState(containerState);
@@ -222,6 +230,7 @@ public static ContainerStatus newContainerStatus(ContainerId containerId,
containerStatus.setDiagnostics(diagnostics);
containerStatus.setExitStatus(exitStatus);
containerStatus.setCapability(capability);
+ containerStatus.setExecutionType(executionType);
return containerStatus;
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
index c23d557..0541454 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
@@ -43,8 +43,8 @@ message NodeStatusProto {
}
message QueuedContainersStatusProto {
- optional int32 estimated_queue_wait_time = 1;
- optional int32 wait_queue_length = 2;
+ optional int32 queue_length = 1;
+ optional int32 estimated_queue_wait_time = 2;
}
message MasterKeyProto {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java
index 27bdfff..f5cd1e2 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java
@@ -147,7 +147,7 @@ public void testNodeHeartBeatRequest() throws IOException {
QueuedContainersStatus queuedContainersStatus = Records.newRecord
(QueuedContainersStatus.class);
queuedContainersStatus.setEstimatedQueueWaitTime(123);
- queuedContainersStatus.setWaitQueueLength(321);
+ queuedContainersStatus.setQueueLength(321);
nodeStatus.setQueuedContainersStatus(queuedContainersStatus);
record.setNodeStatus(nodeStatus);
@@ -159,6 +159,6 @@ public void testNodeHeartBeatRequest() throws IOException {
pb.getNodeStatus()
.getQueuedContainersStatus().getEstimatedQueueWaitTime());
Assert.assertEquals(321,
- pb.getNodeStatus().getQueuedContainersStatus().getWaitQueueLength());
+ pb.getNodeStatus().getQueuedContainersStatus().getQueueLength());
}
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index e0a4da4..c1d6124 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -27,6 +27,7 @@
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
@@ -43,6 +44,12 @@
*/
public interface Context {
+ public interface QueuingContext {
+ Map getQueuedContainers();
+
+ Map getKilledQueuedContainers();
+ }
+
/**
* Return the nodeId. Usable only when the ContainerManager is started.
*
@@ -65,7 +72,7 @@
ConcurrentMap
getIncreasedContainers();
-
+
NMContainerTokenSecretManager getContainerTokenSecretManager();
NMTokenSecretManagerInNM getNMTokenSecretManager();
@@ -92,4 +99,6 @@
boolean isDistributedSchedulingEnabled();
OpportunisticContainerAllocator getContainerAllocator();
+
+ QueuingContext getQueuingContext();
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index ef7b760..7c58cda 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -57,11 +57,13 @@
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
@@ -171,8 +173,13 @@ protected ContainerManagerImpl createContainerManager(Context context,
ContainerExecutor exec, DeletionService del,
NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager,
LocalDirsHandlerService dirsHandler) {
- return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
- metrics, dirsHandler);
+ if (this.context.isDistributedSchedulingEnabled()) {
+ return new QueuingContainerManagerImpl(context, exec, del, nodeStatusUpdater,
+ metrics, dirsHandler);
+ } else {
+ return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
+ metrics, dirsHandler);
+ }
}
protected WebServer createWebServer(Context nmContext,
@@ -468,6 +475,8 @@ public void run() {
private final ConcurrentLinkedQueue
logAggregationReportForApps;
private final boolean isDistSchedulingEnabled;
+
+ private final QueuingContext queuingContext;
private OpportunisticContainerAllocator containerAllocator;
@@ -486,6 +495,11 @@ public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
this.logAggregationReportForApps = new ConcurrentLinkedQueue<
LogAggregationReport>();
this.isDistSchedulingEnabled = isDistSchedulingEnabled;
+ if (this.isDistSchedulingEnabled) {
+ this.queuingContext = new QueuingNMContext();
+ } else {
+ this.queuingContext = null;
+ }
}
/**
@@ -516,7 +530,7 @@ public int getHttpPort() {
getIncreasedContainers() {
return this.increasedContainers;
}
-
+
@Override
public NMContainerTokenSecretManager getContainerTokenSecretManager() {
return this.containerTokenSecretManager;
@@ -613,8 +627,31 @@ public void setQueueableContainerAllocator(
public OpportunisticContainerAllocator getContainerAllocator() {
return containerAllocator;
}
+
+ @Override
+ public QueuingContext getQueuingContext() {
+ return this.queuingContext;
+ }
}
+ public static class QueuingNMContext implements Context.QueuingContext {
+ protected final ConcurrentMap queuedContainers =
+ new ConcurrentSkipListMap<>();
+
+ protected final ConcurrentMap killedQueuedContainers =
+ new ConcurrentSkipListMap();
+
+ @Override
+ public ConcurrentMap getQueuedContainers() {
+ return this.queuedContainers;
+ }
+
+ @Override
+ public ConcurrentMap getKilledQueuedContainers() {
+ return this.killedQueuedContainers;
+ }
+ }
+
/**
* @return the node health checker
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 7d51477..142b88a 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -160,11 +160,11 @@
private static final Log LOG = LogFactory.getLog(ContainerManagerImpl.class);
- static final String INVALID_NMTOKEN_MSG = "Invalid NMToken";
+ public static final String INVALID_NMTOKEN_MSG = "Invalid NMToken";
static final String INVALID_CONTAINERTOKEN_MSG =
"Invalid ContainerToken";
- final Context context;
+ protected final Context context;
private final ContainersMonitor containersMonitor;
private Server server;
private final ResourceLocalizationService rsrcLocalizationSrvc;
@@ -172,7 +172,7 @@
private final AuxServices auxiliaryServices;
private final NodeManagerMetrics metrics;
- private final NodeStatusUpdater nodeStatusUpdater;
+ protected final NodeStatusUpdater nodeStatusUpdater;
protected LocalDirsHandlerService dirsHandler;
protected final AsyncDispatcher dispatcher;
@@ -213,8 +213,7 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec,
auxiliaryServices.registerServiceListener(this);
addService(auxiliaryServices);
- this.containersMonitor =
- new ContainersMonitorImpl(exec, dispatcher, this.context);
+ this.containersMonitor = createContainersMonitor(exec);
addService(this.containersMonitor);
dispatcher.register(ContainerEventType.class,
@@ -233,8 +232,13 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec,
this.writeLock = lock.writeLock();
}
+ protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) {
+ return new ContainersMonitorImpl(exec, dispatcher, this.context);
+ }
+
@Override
public void serviceInit(Configuration conf) throws Exception {
+
LogHandler logHandler =
createLogHandler(conf, this.context, this.deletionService);
addIfService(logHandler);
@@ -815,8 +819,8 @@ public StartContainersResponse startContainers(
this.amrmProxyService.processApplicationStartRequest(request);
}
- startContainerInternal(nmTokenIdentifier, containerTokenIdentifier,
- request);
+ performContainerStart(nmTokenIdentifier, request, containerId,
+ containerTokenIdentifier);
succeededContainers.add(containerId);
} catch (YarnException e) {
failedContainers.put(containerId, SerializedException.newInstance(e));
@@ -834,6 +838,14 @@ public StartContainersResponse startContainers(
}
}
+ protected void performContainerStart(NMTokenIdentifier nmTokenIdentifier,
+ StartContainerRequest request, ContainerId containerId,
+ ContainerTokenIdentifier containerTokenIdentifier)
+ throws YarnException, IOException {
+ startContainerInternal(nmTokenIdentifier, containerTokenIdentifier,
+ request);
+ }
+
private ContainerManagerApplicationProto buildAppProto(ApplicationId appId,
String user, Credentials credentials,
Map appAcls,
@@ -876,7 +888,7 @@ private ContainerManagerApplicationProto buildAppProto(ApplicationId appId,
}
@SuppressWarnings("unchecked")
- private void startContainerInternal(NMTokenIdentifier nmTokenIdentifier,
+ protected void startContainerInternal(NMTokenIdentifier nmTokenIdentifier,
ContainerTokenIdentifier containerTokenIdentifier,
StartContainerRequest request) throws YarnException, IOException {
@@ -1058,6 +1070,8 @@ private void changeContainerResourceInternal(
Container container = context.getContainers().get(containerId);
// Check container existence
if (container == null) {
+ // TODO KONSTANTINOS: If you can changeContainerResourceInternal for
+ // a container that is still queued, we need to account for this case.
if (nodeStatusUpdater.isContainerRecentlyStopped(containerId)) {
throw RPCUtil.getRemoteException("Container " + containerId.toString()
+ " was recently stopped on node manager.");
@@ -1170,7 +1184,7 @@ public StopContainersResponse stopContainers(StopContainersRequest requests)
}
@SuppressWarnings("unchecked")
- private void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier,
+ protected void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier,
ContainerId containerID) throws YarnException, IOException {
String containerIDStr = containerID.toString();
Container container = this.context.getContainers().get(containerID);
@@ -1227,7 +1241,7 @@ public GetContainerStatusesResponse getContainerStatuses(
failedRequests);
}
- private ContainerStatus getContainerStatusInternal(ContainerId containerID,
+ protected ContainerStatus getContainerStatusInternal(ContainerId containerID,
NMTokenIdentifier nmTokenIdentifier) throws YarnException {
String containerIDStr = containerID.toString();
Container container = this.context.getContainers().get(containerID);
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index fb1728a..51dd3b5 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -432,7 +432,8 @@ public ContainerStatus cloneAndGetContainerStatus() {
this.readLock.lock();
try {
return BuilderUtils.newContainerStatus(this.containerId,
- getCurrentState(), diagnostics.toString(), exitCode, getResource());
+ getCurrentState(), diagnostics.toString(), exitCode, getResource(),
+ this.containerTokenIdentifier.getExecutionType());
} finally {
this.readLock.unlock();
}
@@ -486,7 +487,8 @@ private void sendFinishedEvents() {
EventHandler eventHandler = dispatcher.getEventHandler();
eventHandler.handle(new ApplicationContainerFinishedEvent(containerId));
// Remove the container from the resource-monitor
- eventHandler.handle(new ContainerStopMonitoringEvent(containerId));
+ eventHandler.handle(new ContainerStopMonitoringEvent(containerId,
+ containerTokenIdentifier.getExecutionType()));
// Tell the logService too
eventHandler.handle(new LogHandlerContainerFinishedEvent(
containerId, exitCode));
@@ -520,10 +522,10 @@ private void sendContainerMonitorStartEvent() {
int cpuVcores = getResource().getVirtualCores();
long localizationDuration = containerLaunchStartTime -
containerLocalizationStartTime;
- dispatcher.getEventHandler().handle(
- new ContainerStartMonitoringEvent(containerId,
- vmemBytes, pmemBytes, cpuVcores, launchDuration,
- localizationDuration));
+ dispatcher.getEventHandler()
+ .handle(new ContainerStartMonitoringEvent(containerId,
+ containerTokenIdentifier.getExecutionType(), vmemBytes, pmemBytes,
+ cpuVcores, launchDuration, localizationDuration));
}
private void addDiagnostics(String... diags) {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java
index c09bebf..27de052 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStartMonitoringEvent.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
public class ContainerStartMonitoringEvent extends ContainersMonitorEvent {
@@ -28,10 +29,10 @@
private final long launchDuration;
private final long localizationDuration;
- public ContainerStartMonitoringEvent(ContainerId containerId,
+ public ContainerStartMonitoringEvent(ContainerId containerId, ExecutionType executionType,
long vmemLimit, long pmemLimit, int cpuVcores, long launchDuration,
long localizationDuration) {
- super(containerId, ContainersMonitorEventType.START_MONITORING_CONTAINER);
+ super(containerId, executionType, ContainersMonitorEventType.START_MONITORING_CONTAINER);
this.vmemLimit = vmemLimit;
this.pmemLimit = pmemLimit;
this.cpuVcores = cpuVcores;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStopMonitoringEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStopMonitoringEvent.java
index 240c5c0..d883281 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStopMonitoringEvent.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerStopMonitoringEvent.java
@@ -19,11 +19,14 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
public class ContainerStopMonitoringEvent extends ContainersMonitorEvent {
- public ContainerStopMonitoringEvent(ContainerId containerId) {
- super(containerId, ContainersMonitorEventType.STOP_MONITORING_CONTAINER);
+ public ContainerStopMonitoringEvent(ContainerId containerId,
+ ExecutionType executionType) {
+ super(containerId, executionType,
+ ContainersMonitorEventType.STOP_MONITORING_CONTAINER);
}
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEvent.java
index 56e578b..f3aa721 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEvent.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEvent.java
@@ -19,21 +19,33 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.event.AbstractEvent;
public class ContainersMonitorEvent extends
AbstractEvent {
private final ContainerId containerId;
+ private final ExecutionType executionType;
public ContainersMonitorEvent(ContainerId containerId,
ContainersMonitorEventType eventType) {
+ this(containerId, ExecutionType.GUARANTEED, eventType);
+ }
+
+ public ContainersMonitorEvent(ContainerId containerId,
+ ExecutionType executionType, ContainersMonitorEventType eventType) {
super(eventType);
this.containerId = containerId;
+ this.executionType = executionType;
}
public ContainerId getContainerId() {
return this.containerId;
}
+
+ public ExecutionType getExecutionType() {
+ return this.executionType;
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index e6c3642..b61accc 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -63,10 +63,10 @@
new ConcurrentHashMap<>();
private final ContainerExecutor containerExecutor;
- private final Dispatcher eventDispatcher;
- private final Context context;
- private ResourceCalculatorPlugin resourceCalculatorPlugin;
- private Configuration conf;
+ protected final Dispatcher eventDispatcher;
+ protected final Context context;
+ protected ResourceCalculatorPlugin resourceCalculatorPlugin;
+ protected Configuration conf;
private static float vmemRatio;
private Class extends ResourceCalculatorProcessTree> processTreeClass;
@@ -77,15 +77,14 @@
private boolean vmemCheckEnabled;
private boolean containersMonitorEnabled;
- private long maxVCoresAllottedForContainers;
+ protected long maxVCoresAllottedForContainers;
private static final long UNKNOWN_MEMORY_LIMIT = -1L;
- private int nodeCpuPercentageForYARN;
+ protected int nodeCpuPercentageForYARN;
private ResourceUtilization containersUtilization;
private volatile boolean stopped = false;
- private QueuedContainersStatus queuedContainersStatus;
public ContainersMonitorImpl(ContainerExecutor exec,
AsyncDispatcher dispatcher, Context context) {
@@ -98,7 +97,6 @@ public ContainersMonitorImpl(ContainerExecutor exec,
this.monitoringThread = new MonitoringThread();
this.containersUtilization = ResourceUtilization.newInstance(0, 0, 0.0f);
- this.queuedContainersStatus = QueuedContainersStatus.newInstance();
}
@Override
@@ -194,7 +192,7 @@ protected void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
}
- private boolean isEnabled() {
+ protected boolean isEnabled() {
if (resourceCalculatorPlugin == null) {
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
@@ -237,7 +235,7 @@ protected void serviceStop() throws Exception {
}
@VisibleForTesting
- static class ProcessTreeInfo {
+ protected static class ProcessTreeInfo {
private ContainerId containerId;
private String pid;
private ResourceCalculatorProcessTree pTree;
@@ -701,12 +699,7 @@ public void setContainersUtilization(ResourceUtilization utilization) {
}
public QueuedContainersStatus getQueuedContainersStatus() {
- return this.queuedContainersStatus;
- }
-
- public void setQueuedContainersStatus(QueuedContainersStatus
- queuedContainersStatus) {
- this.queuedContainersStatus = queuedContainersStatus;
+ return null;
}
@Override
@@ -726,40 +719,57 @@ public void handle(ContainersMonitorEvent monitoringEvent) {
switch (monitoringEvent.getType()) {
case START_MONITORING_CONTAINER:
- ContainerStartMonitoringEvent startEvent =
- (ContainerStartMonitoringEvent) monitoringEvent;
- LOG.info("Starting resource-monitoring for " + containerId);
- updateContainerMetrics(monitoringEvent);
- trackingContainers.put(containerId,
- new ProcessTreeInfo(containerId, null, null,
- startEvent.getVmemLimit(), startEvent.getPmemLimit(),
- startEvent.getCpuVcores()));
+ onStartMonitoringContainer(monitoringEvent, containerId);
break;
case STOP_MONITORING_CONTAINER:
- LOG.info("Stopping resource-monitoring for " + containerId);
- updateContainerMetrics(monitoringEvent);
- trackingContainers.remove(containerId);
+ onStopMonitoringContainer(monitoringEvent, containerId);
break;
case CHANGE_MONITORING_CONTAINER_RESOURCE:
- ChangeMonitoringContainerResourceEvent changeEvent =
- (ChangeMonitoringContainerResourceEvent) monitoringEvent;
- ProcessTreeInfo processTreeInfo = trackingContainers.get(containerId);
- if (processTreeInfo == null) {
- LOG.warn("Failed to track container "
- + containerId.toString()
- + ". It may have already completed.");
- break;
- }
- LOG.info("Changing resource-monitoring for " + containerId);
- updateContainerMetrics(monitoringEvent);
- long pmemLimit = changeEvent.getResource().getMemory() * 1024L * 1024L;
- long vmemLimit = (long) (pmemLimit * vmemRatio);
- int cpuVcores = changeEvent.getResource().getVirtualCores();
- processTreeInfo.setResourceLimit(pmemLimit, vmemLimit, cpuVcores);
- changeContainerResource(containerId, changeEvent.getResource());
+ onChangeMonitoringContainerResource(monitoringEvent, containerId);
break;
default:
// TODO: Wrong event.
}
}
+
+ protected void onChangeMonitoringContainerResource(
+ ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
+ ChangeMonitoringContainerResourceEvent changeEvent =
+ (ChangeMonitoringContainerResourceEvent) monitoringEvent;
+ ProcessTreeInfo processTreeInfo = trackingContainers.get(containerId);
+ // TODO KONSTANTINOS: Update available resources.
+ if (processTreeInfo == null) {
+ LOG.warn("Failed to track container "
+ + containerId.toString()
+ + ". It may have already completed.");
+ return;
+ }
+ LOG.info("Changing resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ long pmemLimit = changeEvent.getResource().getMemory() * 1024L * 1024L;
+ long vmemLimit = (long) (pmemLimit * vmemRatio);
+ int cpuVcores = changeEvent.getResource().getVirtualCores();
+ processTreeInfo.setResourceLimit(pmemLimit, vmemLimit, cpuVcores);
+ changeContainerResource(containerId, changeEvent.getResource());
+ }
+
+ protected void onStopMonitoringContainer(ContainersMonitorEvent monitoringEvent,
+ ContainerId containerId) {
+ LOG.info("Stopping resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ trackingContainers.remove(containerId);
+ }
+
+ protected void onStartMonitoringContainer(
+ ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
+ ContainerStartMonitoringEvent startEvent =
+ (ContainerStartMonitoringEvent) monitoringEvent;
+ LOG.info("Starting resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ trackingContainers.put(containerId,
+ new ProcessTreeInfo(containerId, null, null,
+ startEvent.getVmemLimit(), startEvent.getPmemLimit(),
+ startEvent.getCpuVcores()));
+ }
+
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEvent.java
new file mode 100644
index 0000000..6e675b7
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEvent.java
@@ -0,0 +1,70 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.event.AbstractEvent;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.security.NMTokenIdentifier;
+
+public class ContainerExecutionEvent
+ extends AbstractEvent {
+
+ private final StartContainerRequest startRequest;
+ private final ContainerTokenIdentifier containerTokenIdentifier;
+ private final ContainerId containerId;
+ private final NMTokenIdentifier nmTokenIdentifier;
+
+ public ContainerExecutionEvent(StartContainerRequest startRequest,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ NMTokenIdentifier nmTokenIdentifier,
+ ContainerExecutionEventType eventType) {
+ super(eventType);
+ this.startRequest = startRequest;
+ this.containerTokenIdentifier = containerTokenIdentifier;
+ this.containerId = containerTokenIdentifier.getContainerID();
+ this.nmTokenIdentifier = nmTokenIdentifier;
+ }
+
+ public ContainerExecutionEvent(ContainerId containerId,
+ ContainerExecutionEventType eventType) {
+ super(eventType);
+ this.startRequest = null;
+ this.containerTokenIdentifier = null;
+ this.containerId = containerId;
+ this.nmTokenIdentifier = null;
+ }
+
+ public StartContainerRequest getStartRequest() {
+ return this.startRequest;
+ }
+
+ public ContainerTokenIdentifier getContainerTokenIdentifier() {
+ return this.containerTokenIdentifier;
+ }
+
+ public ContainerId getContainerId() {
+ return this.containerId;
+ }
+
+ public NMTokenIdentifier getNMTokenIdentifier() {
+ return this.nmTokenIdentifier;
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEventType.java
new file mode 100644
index 0000000..efc7830
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerExecutionEventType.java
@@ -0,0 +1,24 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+public enum ContainerExecutionEventType {
+ CONTAINER_EXECUTION_START,
+ CONTAINER_EXECUTION_STOP
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEvent.java
new file mode 100644
index 0000000..7df9062
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEvent.java
@@ -0,0 +1,69 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
+import org.apache.hadoop.yarn.event.AbstractEvent;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.security.NMTokenIdentifier;
+
+public class ContainerQueuingEvent
+ extends AbstractEvent {
+
+ private final NMTokenIdentifier nmTokenIdentifier;
+ private final ContainerTokenIdentifier containerTokenIdentifier;
+ private final ContainerId containerId;
+ private final ExecutionType executionType;
+ private final StartContainerRequest startRequest;
+
+ public ContainerQueuingEvent(NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ ExecutionType executionType, StartContainerRequest startRequest,
+ ContainerQueuingEventType eventType) {
+ super(eventType);
+ this.nmTokenIdentifier = nmTokenIdentifier;
+ this.containerTokenIdentifier = containerTokenIdentifier;
+ this.containerId = containerTokenIdentifier.getContainerID();
+ this.executionType = executionType;
+ this.startRequest = startRequest;
+ }
+
+ public NMTokenIdentifier getNMTokenIdentifier() {
+ return nmTokenIdentifier;
+ }
+
+ public ContainerTokenIdentifier getContainerTokenIdentifier() {
+ return containerTokenIdentifier;
+ }
+
+ public ContainerId getContainerId() {
+ return containerId;
+ }
+
+ public ExecutionType getExecutionType() {
+ return executionType;
+ }
+
+ public StartContainerRequest getStartRequest() {
+ return startRequest;
+ }
+
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEventType.java
new file mode 100644
index 0000000..c40c742
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/ContainerQueuingEventType.java
@@ -0,0 +1,25 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+public enum ContainerQueuingEventType {
+ CONTAINER_REQUEST_ARRIVED,
+ CONTAINER_FAILED_TO_START,
+ QUEUED_CONTAINER_REMOVED
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java
new file mode 100644
index 0000000..9db799f
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.ipc.RPCUtil;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.security.NMTokenIdentifier;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+
+import java.io.IOException;
+
+public class QueuingContainerManagerImpl extends ContainerManagerImpl {
+
+ class ContainerExecutionEventDispatcher
+ implements EventHandler {
+ @Override
+ public void handle(ContainerExecutionEvent containerExecutionEvent) {
+ StartContainerRequest containerReq = containerExecutionEvent
+ .getStartRequest();
+ ContainerId containerId = containerExecutionEvent
+ .getContainerTokenIdentifier().getContainerID();
+
+ switch (containerExecutionEvent.getType()) {
+ case CONTAINER_EXECUTION_START:
+ try {
+ startContainerInternal(containerExecutionEvent.getNMTokenIdentifier(),
+ containerExecutionEvent.getContainerTokenIdentifier(),
+ containerReq);
+ } catch (YarnException | IOException e) {
+ containerFailedToStart(containerId,
+ containerExecutionEvent.getContainerTokenIdentifier());
+ LOG.error("Container failed to start.", e);
+ }
+ break;
+ case CONTAINER_EXECUTION_STOP:
+ try {
+ stopContainerInternalIfNotQueued(containerId);
+ } catch (YarnException | IOException e) {
+ LOG.error("Container did not get removed successfully.", e);
+ }
+ break;
+ default:
+ throw new YarnRuntimeException(
+ "Got an unknown ContainerExecutionEvent type: "
+ + containerExecutionEvent.getType());
+ }
+ }
+ }
+
+ private static final Log LOG = LogFactory
+ .getLog(QueuingContainerManagerImpl.class);
+
+ public QueuingContainerManagerImpl(Context context, ContainerExecutor exec,
+ DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
+ NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) {
+ super(context, exec, deletionContext, nodeStatusUpdater, metrics,
+ dirsHandler);
+ }
+
+ @Override
+ protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) {
+ return new QueuingContainersMonitorImpl(exec, dispatcher, this.context);
+ }
+
+ @Override
+ public void serviceInit(Configuration conf) throws Exception {
+ super.serviceInit(conf);
+ dispatcher.register(ContainerExecutionEventType.class,
+ new ContainerExecutionEventDispatcher());
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected void performContainerStart(NMTokenIdentifier nmTokenIdentifier,
+ StartContainerRequest request, ContainerId containerId,
+ ContainerTokenIdentifier containerTokenIdentifier)
+ throws YarnException, IOException {
+ this.context.getQueuingContext().getQueuedContainers().put(containerId,
+ containerTokenIdentifier);
+
+ this.dispatcher.getEventHandler()
+ .handle(new ContainerQueuingEvent(nmTokenIdentifier,
+ containerTokenIdentifier,
+ containerTokenIdentifier.getExecutionType(), request,
+ ContainerQueuingEventType.CONTAINER_REQUEST_ARRIVED));
+ }
+
+ @Override
+ protected void startContainerInternal(NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ StartContainerRequest request) throws YarnException, IOException {
+ ContainerId containerId = containerTokenIdentifier.getContainerID();
+ this.context.getQueuingContext().getQueuedContainers().remove(containerId);
+ super.startContainerInternal(nmTokenIdentifier, containerTokenIdentifier,
+ request);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void containerFailedToStart(ContainerId containerId,
+ ContainerTokenIdentifier containerTokenId) {
+ this.context.getQueuingContext().getQueuedContainers().remove(containerId);
+
+ this.dispatcher.getEventHandler()
+ .handle(new ContainerQueuingEvent(null, containerTokenId,
+ containerTokenId.getExecutionType(), null,
+ ContainerQueuingEventType.CONTAINER_FAILED_TO_START));
+
+ this.context.getQueuingContext().getKilledQueuedContainers().put(
+ containerTokenId,
+ "Container removed from queue as it failed to start.");
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier,
+ ContainerId containerID) throws YarnException, IOException {
+ Container container = this.context.getContainers().get(containerID);
+ // If container is null and distributed scheduling is enabled, container
+ // might be queued. Otherwise, container might not be handled by this NM.
+ if (container == null && this.context.getQueuingContext()
+ .getQueuedContainers().containsKey(containerID)) {
+ ContainerTokenIdentifier containerTokenId = this.context
+ .getQueuingContext().getQueuedContainers().remove(containerID);
+
+ this.dispatcher.getEventHandler()
+ .handle(new ContainerQueuingEvent(null, containerTokenId,
+ containerTokenId.getExecutionType(), null,
+ ContainerQueuingEventType.QUEUED_CONTAINER_REMOVED));
+
+ this.context.getQueuingContext().getKilledQueuedContainers().put(
+ containerTokenId,
+ "Queued container request removed by ApplicationMaster.");
+
+ nodeStatusUpdater.sendOutofBandHeartBeat();
+ }
+ super.stopContainerInternal(nmTokenIdentifier, containerID);
+ }
+
+ private void stopContainerInternalIfNotQueued(ContainerId containerID)
+ throws YarnException, IOException {
+ if (this.context.getContainers().containsKey(containerID)) {
+ UserGroupInformation remoteUgi = getRemoteUgi();
+ NMTokenIdentifier identifier = selectNMTokenIdentifier(remoteUgi);
+ if (identifier == null) {
+ throw RPCUtil.getRemoteException(INVALID_NMTOKEN_MSG);
+ }
+ stopContainerInternal(identifier, containerID);
+ }
+ }
+
+ @Override
+ protected ContainerStatus getContainerStatusInternal(ContainerId containerID,
+ NMTokenIdentifier nmTokenIdentifier) throws YarnException {
+ Container container = this.context.getContainers().get(containerID);
+ if (container == null && this.context.getQueuingContext()
+ .getQueuedContainers().containsKey(containerID)) {
+ ExecutionType executionType = this.context.getQueuingContext()
+ .getQueuedContainers().get(containerID).getExecutionType();
+ return BuilderUtils.newContainerStatus(containerID,
+ org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, "",
+ ContainerExitStatus.INVALID, this.context.getQueuingContext()
+ .getQueuedContainers().get(containerID).getResource(),
+ executionType);
+ }
+ return super.getContainerStatusInternal(containerID, nmTokenIdentifier);
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainersMonitorImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainersMonitorImpl.java
new file mode 100644
index 0000000..3b01a41
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainersMonitorImpl.java
@@ -0,0 +1,441 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceUtilization;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class QueuingContainersMonitorImpl extends ContainersMonitorImpl {
+
+ class AllocatedContainerInfo {
+ private ContainerQueuingEvent contQueuingEvent;
+ private ProcessTreeInfo pti;
+
+ AllocatedContainerInfo(ContainerQueuingEvent contQueuingEvent,
+ ProcessTreeInfo pti) {
+ this.contQueuingEvent = contQueuingEvent;
+ this.pti = pti;
+ }
+
+ ContainerQueuingEvent getContQueuingEvent() {
+ return this.contQueuingEvent;
+ }
+
+ protected ProcessTreeInfo getPti() {
+ return this.pti;
+ }
+ }
+
+ public class ContainerQueuingEventDispatcher
+ implements EventHandler {
+ @SuppressWarnings("unchecked")
+ @Override
+ public void handle(ContainerQueuingEvent containerQueuingEvent) {
+ if (!isEnabled()) {
+ return;
+ }
+
+ switch (containerQueuingEvent.getType()) {
+ case CONTAINER_REQUEST_ARRIVED:
+ ProcessTreeInfo pti = createProcessTreeInfo(
+ containerQueuingEvent.getContainerId(),
+ containerQueuingEvent.getContainerTokenIdentifier().getResource());
+
+ AllocatedContainerInfo allocatedContInfo = new AllocatedContainerInfo(
+ containerQueuingEvent, pti);
+
+ // If there are already free resources to start the container.
+ if (hasAllocatedResourcesAvailable(pti)) {
+ startAllocatedContainer(allocatedContInfo);
+ } else {
+ if (containerQueuingEvent
+ .getExecutionType() == ExecutionType.GUARANTEED) {
+ synchronized (queuedGuarRequests) {
+ queuedGuarRequests.add(allocatedContInfo);
+ }
+ // Kill running opportunistic containers to make space for
+ // guaranteed container.
+ killOpportContainers(allocatedContInfo);
+ } else {
+ synchronized (queuedOpportRequests) {
+ queuedOpportRequests.add(allocatedContInfo);
+ }
+ }
+ }
+ break;
+ case QUEUED_CONTAINER_REMOVED:
+ boolean foundInQueue = removeContainerFromQueues(
+ containerQueuingEvent.getContainerId(),
+ containerQueuingEvent.getExecutionType());
+
+ // The container started in the meanwhile, so the ContainerManager needs
+ // to be notified and stop it.
+ if (!foundInQueue) {
+ eventDispatcher.getEventHandler()
+ .handle(new ContainerExecutionEvent(
+ containerQueuingEvent.getContainerTokenIdentifier()
+ .getContainerID(),
+ ContainerExecutionEventType.CONTAINER_EXECUTION_STOP));
+ }
+ break;
+ case CONTAINER_FAILED_TO_START:
+ removeAllocatedContainer(containerQueuingEvent.getContainerId(),
+ containerQueuingEvent.getExecutionType());
+ break;
+ default:
+ // TODO: Wrong event
+ }
+ }
+ }
+
+ private Map allocatedGuarContainers;
+ private Map allocatedOpportContainers;
+
+ private ResourceUtilization allocatedContainersUtilization;
+
+ private Queue queuedGuarRequests;
+ private Queue queuedOpportRequests;
+ private Set opportContainersToKill;
+
+ public QueuingContainersMonitorImpl(ContainerExecutor exec,
+ AsyncDispatcher dispatcher, Context context) {
+ super(exec, dispatcher, context);
+ this.allocatedGuarContainers = new ConcurrentHashMap<>();
+ this.allocatedOpportContainers = new ConcurrentHashMap<>();
+ this.allocatedContainersUtilization = ResourceUtilization.newInstance(0, 0,
+ 0.0f);
+ this.queuedGuarRequests = new LinkedList<>();
+ this.queuedOpportRequests = new LinkedList<>();
+ this.opportContainersToKill = Collections
+ .synchronizedSet(new HashSet());
+ }
+
+ @Override
+ protected void serviceInit(Configuration conf) throws Exception {
+ super.serviceInit(conf);
+ eventDispatcher.register(ContainerQueuingEventType.class,
+ new ContainerQueuingEventDispatcher());
+ }
+
+ private ProcessTreeInfo createProcessTreeInfo(ContainerId containerId,
+ Resource resource) {
+ long pmemBytes = resource.getMemory() * 1024 * 1024L;
+ float pmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
+ YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
+ long vmemBytes = (long) (pmemRatio * pmemBytes);
+ int cpuVcores = resource.getVirtualCores();
+
+ return new ProcessTreeInfo(containerId, null, null, vmemBytes, pmemBytes,
+ cpuVcores);
+ }
+
+ private float allocatedCpuUsage(ProcessTreeInfo pti) {
+ float cpuUsagePercentPerCore = pti.getCpuVcores() * 100.0f;
+ float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore
+ / resourceCalculatorPlugin.getNumProcessors();
+ return (cpuUsageTotalCoresPercentage * 1000 * maxVCoresAllottedForContainers
+ / nodeCpuPercentageForYARN) / 1000.0f;
+ }
+
+ private void increaseResourceUtil(ResourceUtilization resourceUtil,
+ ProcessTreeInfo pti) {
+ resourceUtil.addTo((int) (pti.getPmemLimit() >> 20),
+ (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti));
+ }
+
+ private void decreaseResourceUtil(ResourceUtilization resourceUtil,
+ ProcessTreeInfo pti) {
+ resourceUtil.subtractFrom((int) (pti.getPmemLimit() >> 20),
+ (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti));
+ }
+
+ @Override
+ protected void onStopMonitoringContainer(
+ ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
+ super.onStopMonitoringContainer(monitoringEvent, containerId);
+ // Remove finished container from the allocated containers.
+ // Attempt to start new containers, if resources available.
+ removeAllocatedContainer(containerId, monitoringEvent.getExecutionType());
+ opportContainersToKill.remove(containerId);
+ startPendingContainers();
+ }
+
+ /**
+ * @return true if there are available allocated resources for the given
+ * container to start.
+ */
+ private boolean hasAllocatedResourcesAvailable(ProcessTreeInfo pti) {
+ // TODO: Would it be better to copy the allocatedContainersUtiliation locally,
+ // instead of holding the lock for the whole check?
+ synchronized (this.allocatedContainersUtilization) {
+ // Check physical memory.
+ if (this.allocatedContainersUtilization.getPhysicalMemory() + (int) (pti
+ .getPmemLimit() >> 20) > (int) (getPmemAllocatedForContainers() >> 20)) {
+ return false;
+ }
+ // Check virtual memory.
+ if (this.allocatedContainersUtilization.getVirtualMemory() + (int) (pti
+ .getVmemLimit() >> 20) > (int) (getVmemAllocatedForContainers() >> 20)) {
+ return false;
+ }
+ // Check CPU.
+ if (this.allocatedContainersUtilization.getCPU()
+ + allocatedCpuUsage(pti) > 1.0f) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Start the execution of the given container. Add it to the allocated
+ * containers, update allocated resource utilization and send event to
+ * ContainerManager to start the actual execution of the container.
+ */
+ @SuppressWarnings("unchecked")
+ private void startAllocatedContainer(AllocatedContainerInfo allocatedContInfo) {
+ ContainerQueuingEvent contQueuingEvent = allocatedContInfo
+ .getContQueuingEvent();
+ ProcessTreeInfo pti = allocatedContInfo.getPti();
+
+ if (contQueuingEvent.getExecutionType() == ExecutionType.GUARANTEED) {
+ allocatedGuarContainers.put(pti.getContainerId(), pti);
+ } else {
+ allocatedOpportContainers.put(pti.getContainerId(), pti);
+ }
+
+ increaseResourceUtil(this.allocatedContainersUtilization, pti);
+
+ // Send event to ContainerManager to start execution of container.
+ eventDispatcher.getEventHandler()
+ .handle(new ContainerExecutionEvent(contQueuingEvent.getStartRequest(),
+ contQueuingEvent.getContainerTokenIdentifier(),
+ contQueuingEvent.getNMTokenIdentifier(),
+ ContainerExecutionEventType.CONTAINER_EXECUTION_START));
+ }
+
+ private boolean startContainersFromQueue(
+ Queue queuedContainers,
+ boolean resourcesAvailable) {
+ synchronized (queuedContainers) {
+ Iterator guarIter = queuedContainers.iterator();
+ while (guarIter.hasNext() && resourcesAvailable) {
+ AllocatedContainerInfo allocatedContInfo = guarIter.next();
+
+ if (hasAllocatedResourcesAvailable(allocatedContInfo.getPti())) {
+ startAllocatedContainer(allocatedContInfo);
+ guarIter.remove();
+ } else {
+ resourcesAvailable = false;
+ }
+ }
+ }
+ return resourcesAvailable;
+ }
+
+ /**
+ * If there are available resources, try to start as many pending containers
+ * as possible.
+ */
+ private void startPendingContainers() {
+ boolean resourcesAvailable = true;
+ // Start pending guaranteed containers, if resources available.
+ resourcesAvailable = startContainersFromQueue(queuedGuarRequests,
+ resourcesAvailable);
+ if (!resourcesAvailable) {
+ return;
+ }
+ // Start opportunistic container, if resources available.
+ startContainersFromQueue(queuedOpportRequests, resourcesAvailable);
+ }
+
+ /**
+ * Remove the given container from the allocated containers, and update allocated
+ * container utilization accordingly.
+ */
+ private void removeAllocatedContainer(ContainerId containerId,
+ ExecutionType executionType) {
+ ProcessTreeInfo ptiToRemove = null;
+ if (executionType == ExecutionType.GUARANTEED) {
+ ptiToRemove = allocatedGuarContainers.remove(containerId);
+ } else {
+ ptiToRemove = allocatedOpportContainers.remove(containerId);
+ }
+ // If container was indeed running, update allocated resource utilization.
+ if (ptiToRemove != null) {
+ decreaseResourceUtil(this.allocatedContainersUtilization, ptiToRemove);
+ }
+ }
+
+ /**
+ * Remove the given container from the container queues.
+ *
+ * @return true if the container was found in one of the queues.
+ */
+ private boolean removeContainerFromQueues(ContainerId containerId,
+ ExecutionType executionType) {
+ boolean foundInQueue = false;
+ if (executionType == ExecutionType.GUARANTEED) {
+ synchronized (queuedGuarRequests) {
+ Iterator guarIter = queuedGuarRequests.iterator();
+ while (guarIter.hasNext() && !foundInQueue) {
+ if (guarIter.next().getPti().getContainerId().equals(containerId)) {
+ guarIter.remove();
+ foundInQueue = true;
+ }
+ }
+ }
+ } else {
+ synchronized (queuedOpportRequests) {
+ Iterator opportIter = queuedOpportRequests
+ .iterator();
+ while (opportIter.hasNext() && !foundInQueue) {
+ if (opportIter.next().getPti().getContainerId().equals(containerId)) {
+ opportIter.remove();
+ foundInQueue = true;
+ }
+ }
+ }
+ }
+
+ return foundInQueue;
+ }
+
+ /**
+ * Calculates the amount of resources that need to be free up (by killing
+ * opportunistic containers) in order for the given guaranteed container to
+ * start its execution. Resource utilization to be freed up =
+ * allocatedContainersUtilization - utilization of opportContainersToKill +
+ * utilization of pending guaranteed containers that will start before the
+ * given container + utilization of given container - total resources of node.
+ *
+ * @param containerToStartId
+ * the ContainerId of the guaranteed container for which we need to
+ * free resources, so that its execution can start.
+ * @return the resources that need to be freed up for the given guaranteed
+ * container to start.
+ */
+ private ResourceUtilization resourcesToFreeUp(
+ ContainerId containerToStartId) {
+ // Get current utilization of allocated containers.
+ ResourceUtilization resourceUtilToFreeUp = ResourceUtilization
+ .newInstance(allocatedContainersUtilization);
+
+ // Subtract from the utilization the utilization of the opportunistic
+ // containers that are marked for killing.
+ synchronized (opportContainersToKill) {
+ for (ContainerId opportContId : opportContainersToKill) {
+ if (this.allocatedOpportContainers.containsKey(opportContId)) {
+ decreaseResourceUtil(resourceUtilToFreeUp,
+ this.allocatedOpportContainers.get(opportContId));
+ }
+ }
+ }
+ // Add to the utilization the utilization of the pending guaranteed
+ // containers that
+ // will start before the current container will be started.
+ synchronized (queuedGuarRequests) {
+ for (AllocatedContainerInfo guarContInfo : queuedGuarRequests) {
+ increaseResourceUtil(resourceUtilToFreeUp, guarContInfo.getPti());
+ if (guarContInfo.getContQueuingEvent().getContainerId()
+ .equals(containerToStartId)) {
+ break;
+ }
+ }
+ }
+ // Subtract the overall node resources.
+ resourceUtilToFreeUp.subtractFrom(
+ (int) (getPmemAllocatedForContainers() >> 20),
+ (int) (getVmemAllocatedForContainers() >> 20), 1.0f);
+
+ return resourceUtilToFreeUp;
+ }
+
+ /**
+ * Kill opportunistic containers to free up resources for running the given
+ * container.
+ *
+ * @param allocatedContInfo
+ * the container whose execution needs to start by freeing up
+ * resources occupied by opportunistic containers.
+ */
+ @SuppressWarnings("unchecked")
+ private void killOpportContainers(AllocatedContainerInfo allocatedContInfo) {
+ ContainerId containerToStartId = allocatedContInfo.getContQueuingEvent()
+ .getContainerId();
+ // Track resources that need to be freed.
+ ResourceUtilization resourcesToFreeUp = resourcesToFreeUp(
+ containerToStartId);
+
+ // Go over the running opportunistic containers. Avoid containers that have
+ // already been marked for killing.
+ for (Map.Entry runningOpportCont : allocatedOpportContainers
+ .entrySet()) {
+ ContainerId runningOpportContId = runningOpportCont.getKey();
+
+ // If there are sufficient resources to execute the given container, do
+ // not kill more opportunistic containers.
+ if (resourcesToFreeUp.getPhysicalMemory() <= 0
+ && resourcesToFreeUp.getVirtualMemory() <= 0
+ && resourcesToFreeUp.getCPU() <= 0.f) {
+ break;
+ }
+
+ if (!opportContainersToKill.contains(runningOpportContId)) {
+ opportContainersToKill.add(runningOpportContId);
+ decreaseResourceUtil(resourcesToFreeUp, runningOpportCont.getValue());
+ // Send event to ContainerManager for actually killing the container.
+ eventDispatcher.getEventHandler()
+ .handle(new ContainerExecutionEvent(runningOpportContId,
+ ContainerExecutionEventType.CONTAINER_EXECUTION_STOP));
+ }
+ }
+ }
+
+ @Override
+ public QueuedContainersStatus getQueuedContainersStatus() {
+ QueuedContainersStatus qContStatus = QueuedContainersStatus.newInstance();
+ synchronized(queuedOpportRequests) {
+ qContStatus.setQueueLength(queuedOpportRequests.size());
+ }
+ qContStatus.setEstimatedQueueWaitTime(-1);
+ return qContStatus;
+ }
+
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
index e1ffd88..7bf5aa6 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
@@ -619,7 +619,7 @@ public int getHttpPort() {
public ConcurrentMap getContainers() {
return null;
}
-
+
@Override
public ConcurrentMap getIncreasedContainers() {
return null;
@@ -688,5 +688,10 @@ public boolean isDistributedSchedulingEnabled() {
public OpportunisticContainerAllocator getContainerAllocator() {
return null;
}
+
+ @Override
+ public QueuingContext getQueuingContext() {
+ return null;
+ }
}
}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
index d7f89fc..c4247ac 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
@@ -27,6 +27,7 @@
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
@@ -153,7 +154,7 @@ public void testContainersResourceChange() throws Exception {
containersMonitor.start();
// create container 1
containersMonitor.handle(new ContainerStartMonitoringEvent(
- getContainerId(1), 2100L, 1000L, 1, 0, 0));
+ getContainerId(1), ExecutionType.GUARANTEED, 2100L, 1000L, 1, 0, 0));
// verify that this container is properly tracked
assertNotNull(getProcessTreeInfo(getContainerId(1)));
assertEquals(1000L, getProcessTreeInfo(getContainerId(1))
@@ -173,8 +174,9 @@ public void testContainersResourceChange() throws Exception {
assertTrue(containerEventHandler
.isContainerKilled(getContainerId(1)));
// create container 2
- containersMonitor.handle(new ContainerStartMonitoringEvent(
- getContainerId(2), 2202009L, 1048576L, 1, 0, 0));
+ containersMonitor
+ .handle(new ContainerStartMonitoringEvent(getContainerId(2),
+ ExecutionType.GUARANTEED, 2202009L, 1048576L, 1, 0, 0));
// verify that this container is properly tracked
assertNotNull(getProcessTreeInfo(getContainerId(2)));
assertEquals(1048576L, getProcessTreeInfo(getContainerId(2))
@@ -215,8 +217,9 @@ public void testContainersResourceChangeIsTriggeredImmediately()
// now waiting for the next monitor cycle
Thread.sleep(1000);
// create a container with id 3
- containersMonitor.handle(new ContainerStartMonitoringEvent(
- getContainerId(3), 2202009L, 1048576L, 1, 0, 0));
+ containersMonitor
+ .handle(new ContainerStartMonitoringEvent(getContainerId(3),
+ ExecutionType.GUARANTEED, 2202009L, 1048576L, 1, 0, 0));
// Verify that this container has been tracked
assertNotNull(getProcessTreeInfo(getContainerId(3)));
// trigger a change resource event, check limit after change
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TopKNodeSelector.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TopKNodeSelector.java
index 5aedbed..00d34a6 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TopKNodeSelector.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TopKNodeSelector.java
@@ -156,7 +156,7 @@ public void nodeUpdate(RMNode rmNode) {
rmNode.getQueuedContainersStatus();
int estimatedQueueWaitTime =
queuedContainersStatus.getEstimatedQueueWaitTime();
- int waitQueueLength = queuedContainersStatus.getWaitQueueLength();
+ int waitQueueLength = queuedContainersStatus.getQueueLength();
// Add nodes to clusterNodes.. if estimatedQueueTime is -1, Ignore node
// UNLESS comparator is based on queue length, in which case, we should add
synchronized (this.clusterNodes) {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestTopKNodeSelector.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestTopKNodeSelector.java
index a21ae19..0d5ff98 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestTopKNodeSelector.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestTopKNodeSelector.java
@@ -193,7 +193,7 @@ private RMNode createRMNode(String host, int port,
Mockito.mock(QueuedContainersStatus.class);
Mockito.when(status1.getEstimatedQueueWaitTime())
.thenReturn(waitTime);
- Mockito.when(status1.getWaitQueueLength())
+ Mockito.when(status1.getQueueLength())
.thenReturn(queueLength);
Mockito.when(node1.getQueuedContainersStatus()).thenReturn(status1);
return node1;