diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index 9152b1b8..2d1591b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -230,9 +230,11 @@ protected void serviceInit(Configuration conf) throws Exception { // Init startTime to current time. If all goes well, it will be reset after // first attempt to contact RM. retrystartTime = System.currentTimeMillis(); - this.scheduledRequests.setNumOpportunisticMapsPer100( - conf.getInt(MRJobConfig.MR_NUM_OPPORTUNISTIC_MAPS_PER_100, - MRJobConfig.DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PER_100)); + this.scheduledRequests.setNumOpportunisticMapsPercent( + conf.getInt(MRJobConfig.MR_NUM_OPPORTUNISTIC_MAPS_PERCENT, + MRJobConfig.DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PERCENT)); + LOG.info(this.scheduledRequests.getNumOpportunisticMapsPercent() + + "% of the mappers will be scheduled using OPPORTUNISTIC containers"); } @Override @@ -1024,10 +1026,14 @@ public Resource getResourceLimit() { final Map maps = new LinkedHashMap(); int mapsMod100 = 0; - int numOpportunisticMapsPer100 = 0; + int numOpportunisticMapsPercent = 0; - void setNumOpportunisticMapsPer100(int numMaps) { - this.numOpportunisticMapsPer100 = numMaps; + void setNumOpportunisticMapsPercent(int numMaps) { + this.numOpportunisticMapsPercent = numMaps; + } + + int getNumOpportunisticMapsPercent() { + return this.numOpportunisticMapsPercent; } @VisibleForTesting @@ -1074,7 +1080,7 @@ void addMap(ContainerRequestEvent event) { maps.put(event.getAttemptID(), request); addContainerReq(request); } else { - if (mapsMod100 < numOpportunisticMapsPer100) { + if (mapsMod100 < numOpportunisticMapsPercent) { request = new ContainerRequest(event, PRIORITY_OPPORTUNISTIC_MAP, mapNodeLabelExpression); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index e046c66..015c1f2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -988,9 +988,9 @@ * requested by the AM will be opportunistic. If the total number of maps * for the job is less than 'x', then ALL maps will be OPPORTUNISTIC */ - public static final String MR_NUM_OPPORTUNISTIC_MAPS_PER_100 = - "mapreduce.job.num-opportunistic-maps-per-100"; - public static final int DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PER_100 = 0; + public static final String MR_NUM_OPPORTUNISTIC_MAPS_PERCENT = + "mapreduce.job.num-opportunistic-maps-percent"; + public static final int DEFAULT_MR_NUM_OPPORTUNISTIC_MAPS_PERCENT = 0; /** * A comma-separated list of properties whose value will be redacted. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java index 021863b..eed731f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMROpportunisticMaps.java @@ -68,15 +68,6 @@ public void testHalfOpportunisticMaps() throws Exception { doTest(4, 1, 1, 2); } - /** - * Test will run with 6 Maps and 2 Reducers. All the Maps are OPPORTUNISTIC. - * @throws Exception - */ - @Test - public void testMultipleReducers() throws Exception { - doTest(6, 2, 1, 6); - } - public void doTest(int numMappers, int numReducers, int numNodes, int percent) throws Exception { doTest(numMappers, numReducers, numNodes, 1000, percent); @@ -94,7 +85,8 @@ public void doTest(int numMappers, int numReducers, int numNodes, conf.setBoolean(YarnConfiguration. OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true); conf.setBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, true); - conf.setBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED, true); + conf.setInt( + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); dfsCluster = new MiniDFSCluster.Builder(conf) .numDataNodes(numNodes).build(); fileSystem = dfsCluster.getFileSystem(); @@ -104,11 +96,7 @@ public void doTest(int numMappers, int numReducers, int numNodes, createInput(fileSystem, numMappers, numLines); // Run the test. - Configuration jobConf = mrCluster.getConfig(); - jobConf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, - YarnConfiguration.DEFAULT_AMRM_PROXY_ADDRESS); - - runMergeTest(new JobConf(jobConf), fileSystem, + runMergeTest(new JobConf(conf), fileSystem, numMappers, numReducers, numLines, percent); } finally { if (dfsCluster != null) { @@ -157,7 +145,7 @@ private void runMergeTest(JobConf job, FileSystem fileSystem, int job.setNumReduceTasks(numReducers); // All OPPORTUNISTIC - job.setInt(MRJobConfig.MR_NUM_OPPORTUNISTIC_MAPS_PER_100, percent); + job.setInt(MRJobConfig.MR_NUM_OPPORTUNISTIC_MAPS_PERCENT, percent); job.setInt("mapreduce.map.maxattempts", 1); job.setInt("mapreduce.reduce.maxattempts", 1); job.setInt("mapred.test.num_lines", numLines); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index d836f0d..8962aba 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -36,7 +36,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode @@ -191,7 +191,7 @@ public void updateNodeHeartbeatResponseForContainersDecreasing( return null; } - public QueuedContainersStatus getQueuedContainersStatus() { + public OpportunisticContainersStatus getOpportunisticContainersStatus() { return null; } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index 7fd8d11..d7b159c 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode @@ -180,7 +180,7 @@ public void updateNodeHeartbeatResponseForContainersDecreasing( return Collections.EMPTY_LIST; } - public QueuedContainersStatus getQueuedContainersStatus() { + public OpportunisticContainersStatus getOpportunisticContainersStatus() { return null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java index f88fa3b..0207010 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerExitStatus.java @@ -72,4 +72,10 @@ */ public static final int KILLED_AFTER_APP_COMPLETION = -107; + /** + * Container was terminated by the ContainerScheduler to make room + * for another container... + */ + public static final int KILLED_BY_CONTAINER_SCHEDULER = -108; + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java index 582389f..696fe06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.api.records; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Stable; /** @@ -36,6 +37,7 @@ /** Completed container */ COMPLETE, - /** Queued at the NM. */ - QUEUED + /** Scheduled (awaiting resources) at the NM. */ + @InterfaceStability.Unstable + SCHEDULED } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/UpdateContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/UpdateContainerRequest.java index 200dea3..e4f7a82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/UpdateContainerRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/UpdateContainerRequest.java @@ -159,6 +159,17 @@ public int hashCode() { } @Override + public String toString() { + return "UpdateReq{" + + "containerId=" + getContainerId() + ", " + + "containerVersion=" + getContainerVersion() + ", " + + "targetExecType=" + getExecutionType() + ", " + + "targetCapability=" + getCapability() + ", " + + "updateType=" + getContainerUpdateType() + ", " + + "}"; + } + + @Override public boolean equals(Object obj) { if (this == obj) { return true; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index efa8b9d..602c190 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -297,106 +297,74 @@ public static boolean isAclEnabled(Configuration conf) { /** ACL used in case none is found. Allows nothing. */ public static final String DEFAULT_YARN_APP_ACL = " "; - /** Setting that controls whether distributed scheduling is enabled or not. */ - public static final String DIST_SCHEDULING_ENABLED = - YARN_PREFIX + "distributed-scheduling.enabled"; - public static final boolean DIST_SCHEDULING_ENABLED_DEFAULT = false; - /** Setting that controls whether opportunistic container allocation * is enabled or not. */ + @Unstable public static final String OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED = - YARN_PREFIX + "opportunistic-container-allocation.enabled"; + RM_PREFIX + "opportunistic-container-allocation.enabled"; public static final boolean - OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED_DEFAULT = false; - - /** Minimum memory (in MB) used for allocating an opportunistic container. */ - public static final String OPPORTUNISTIC_CONTAINERS_MIN_MEMORY_MB = - YARN_PREFIX + "opportunistic-containers.min-memory-mb"; - public static final int OPPORTUNISTIC_CONTAINERS_MIN_MEMORY_MB_DEFAULT = 512; - - /** Minimum virtual CPU cores used for allocating an opportunistic container. - * */ - public static final String OPPORTUNISTIC_CONTAINERS_MIN_VCORES = - YARN_PREFIX + "opportunistic-containers.min-vcores"; - public static final int OPPORTUNISTIC_CONTAINERS_MIN_VCORES_DEFAULT = 1; - - /** Maximum memory (in MB) used for allocating an opportunistic container. */ - public static final String OPPORTUNISTIC_CONTAINERS_MAX_MEMORY_MB = - YARN_PREFIX + "opportunistic-containers.max-memory-mb"; - public static final int OPPORTUNISTIC_CONTAINERS_MAX_MEMORY_MB_DEFAULT = 2048; - - /** Maximum virtual CPU cores used for allocating an opportunistic container. - * */ - public static final String OPPORTUNISTIC_CONTAINERS_MAX_VCORES = - YARN_PREFIX + "opportunistic-containers.max-vcores"; - public static final int OPPORTUNISTIC_CONTAINERS_MAX_VCORES_DEFAULT = 4; - - /** Incremental memory (in MB) used for allocating an opportunistic container. - * */ - public static final String OPPORTUNISTIC_CONTAINERS_INCR_MEMORY_MB = - YARN_PREFIX + "opportunistic-containers.incr-memory-mb"; - public static final int OPPORTUNISTIC_CONTAINERS_INCR_MEMORY_MB_DEFAULT = - 512; - - /** Incremental virtual CPU cores used for allocating an opportunistic - * container. */ - public static final String OPPORTUNISTIC_CONTAINERS_INCR_VCORES = - YARN_PREFIX + "opportunistic-containers.incr-vcores"; - public static final int OPPORTUNISTIC_CONTAINERS_INCR_VCORES_DEFAULT = 1; - - /** Container token expiry for opportunistic containers. */ - public static final String OPPORTUNISTIC_CONTAINERS_TOKEN_EXPIRY_MS = - YARN_PREFIX + "opportunistic-containers.container-token-expiry-ms"; - public static final int OPPORTUNISTIC_CONTAINERS_TOKEN_EXPIRY_MS_DEFAULT = - 600000; + DEFAULT_OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED = false; /** Number of nodes to be used by the Opportunistic Container allocator for * dispatching containers during container allocation. */ + @Unstable public static final String OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED = - YARN_PREFIX + "opportunistic-container-allocation.nodes-used"; - public static final int OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED_DEFAULT = + RM_PREFIX + "opportunistic-container-allocation.nodes-used"; + public static final int DEFAULT_OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED = 10; /** Frequency for computing least loaded NMs. */ + @Unstable public static final String NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS = - YARN_PREFIX + "nm-container-queuing.sorting-nodes-interval-ms"; + RM_PREFIX + "nm-container-queuing.sorting-nodes-interval-ms"; public static final long - NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS_DEFAULT = 1000; + DEFAULT_NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS = 1000; - /** Comparator for determining node load for Distributed Scheduling. */ + /** Comparator for determining node load for scheduling of opportunistic + * containers. */ + @Unstable public static final String NM_CONTAINER_QUEUING_LOAD_COMPARATOR = - YARN_PREFIX + "nm-container-queuing.load-comparator"; - public static final String NM_CONTAINER_QUEUING_LOAD_COMPARATOR_DEFAULT = + RM_PREFIX + "nm-container-queuing.load-comparator"; + public static final String DEFAULT_NM_CONTAINER_QUEUING_LOAD_COMPARATOR = "QUEUE_LENGTH"; /** Value of standard deviation used for calculation of queue limit * thresholds. */ + @Unstable public static final String NM_CONTAINER_QUEUING_LIMIT_STDEV = - YARN_PREFIX + "nm-container-queuing.queue-limit-stdev"; - public static final float NM_CONTAINER_QUEUING_LIMIT_STDEV_DEFAULT = + RM_PREFIX + "nm-container-queuing.queue-limit-stdev"; + public static final float DEFAULT_NM_CONTAINER_QUEUING_LIMIT_STDEV = 1.0f; - /** Min length of container queue at NodeManager. */ + /** Min length of container queue at NodeManager. This is a cluster-wide + * configuration that acts as the lower-bound of optimal queue length + * calculated by the NodeQueueLoadMonitor */ + @Unstable public static final String NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH = - YARN_PREFIX + "nm-container-queuing.min-queue-length"; - public static final int NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH_DEFAULT = 1; + RM_PREFIX + "nm-container-queuing.min-queue-length"; + public static final int DEFAULT_NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH = 5; - /** Max length of container queue at NodeManager. */ + /** Max length of container queue at NodeManager. This is a cluster-wide + * configuration that acts as the upper-bound of optimal queue length + * calculated by the NodeQueueLoadMonitor */ + @Unstable public static final String NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH = - YARN_PREFIX + "nm-container-queuing.max-queue-length"; - public static final int NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH_DEFAULT = 10; + RM_PREFIX + "nm-container-queuing.max-queue-length"; + public static final int DEFAULT_NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH = 15; /** Min queue wait time for a container at a NodeManager. */ + @Unstable public static final String NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS = - YARN_PREFIX + "nm-container-queuing.min-queue-wait-time-ms"; - public static final int NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS_DEFAULT = - 1; + RM_PREFIX + "nm-container-queuing.min-queue-wait-time-ms"; + public static final int DEFAULT_NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS = + 10; /** Max queue wait time for a container queue at a NodeManager. */ + @Unstable public static final String NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS = - YARN_PREFIX + "nm-container-queuing.max-queue-wait-time-ms"; - public static final int NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS_DEFAULT = - 10; + RM_PREFIX + "nm-container-queuing.max-queue-wait-time-ms"; + public static final int DEFAULT_NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS = + 100; /** * Enable/disable intermediate-data encryption at YARN level. For now, this @@ -802,10 +770,16 @@ public static boolean isAclEnabled(Configuration conf) { /** Prefix for all node manager configs.*/ public static final String NM_PREFIX = "yarn.nodemanager."; - /** Enable Queuing of OPPORTUNISTIC containers. */ - public static final String NM_CONTAINER_QUEUING_ENABLED = NM_PREFIX - + "container-queuing-enabled"; - public static final boolean NM_CONTAINER_QUEUING_ENABLED_DEFAULT = false; + /** Max Queue length of OPPORTUNISTIC containers on the NM. */ + public static final String NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH = + NM_PREFIX + "opportunistic-containers-max-queue-length"; + public static final int DEFAULT_NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH = + 0; + + /** Setting that controls whether distributed scheduling is enabled or not. */ + public static final String DIST_SCHEDULING_ENABLED = + NM_PREFIX + "distributed-scheduling.enabled"; + public static final boolean DEFAULT_DIST_SCHEDULING_ENABLED = false; /** Environment variables that will be sent to containers.*/ public static final String NM_ADMIN_USER_ENV = NM_PREFIX + "admin-env"; @@ -2780,14 +2754,14 @@ public static String getClusterId(Configuration conf) { public static boolean isDistSchedulingEnabled(Configuration conf) { return conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, - YarnConfiguration.DIST_SCHEDULING_ENABLED_DEFAULT); + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); } public static boolean isOpportunisticContainerAllocationEnabled( Configuration conf) { return conf.getBoolean( YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, - YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED_DEFAULT); + YarnConfiguration.DEFAULT_OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED); } /* For debugging. mp configurations to system output as XML format. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index c1bb07e..d2db5b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -82,7 +82,7 @@ enum ContainerStateProto { C_NEW = 1; C_RUNNING = 2; C_COMPLETE = 3; - C_QUEUED = 4; + C_SCHEDULED = 4; } message ContainerProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java index 9dbdb22..d69a73c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java @@ -104,11 +104,11 @@ public void doBefore() throws Exception { cluster = new MiniYARNCluster("testDistributedSchedulingE2E", 1, 1, 1); conf = new YarnConfiguration(); - conf.setBoolean(YarnConfiguration.AMRM_PROXY_ENABLED, true); conf.setBoolean(YarnConfiguration. OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true); conf.setBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, true); - conf.setBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED, true); + conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, + 10); cluster.init(conf); cluster.start(); yarnConf = cluster.getConfig(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java index 3640883..d211d6d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.Service.STATE; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -330,6 +331,12 @@ private void testContainerManagement(NMClientImpl nmClient, ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); ContainerLaunchContext clc = Records.newRecord(ContainerLaunchContext.class); + if (Shell.WINDOWS) { + clc.setCommands( + Arrays.asList("ping", "-n", "100", "127.0.0.1", ">nul")); + } else { + clc.setCommands(Arrays.asList("sleep", "10")); + } clc.setTokens(securityTokens); try { nmClient.startContainer(container, clc); @@ -415,7 +422,7 @@ private void testIncreaseContainerResource(Container container) try { nmClient.increaseContainerResource(container); } catch (YarnException e) { - // NM container will only be in LOCALIZED state, so expect the increase + // NM container will only be in SCHEDULED state, so expect the increase // action to fail. if (!e.getMessage().contains( "can only be changed when a container is in RUNNING state")) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java new file mode 100644 index 0000000..802c207 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java @@ -0,0 +1,469 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client.api.impl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.NMToken; +import org.apache.hadoop.yarn.api.records.NodeReport; +import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.ClientRMProxy; +import org.apache.hadoop.yarn.client.api.AMRMClient; +import org.apache.hadoop.yarn.client.api.NMTokenCache; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.MiniYARNCluster; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.Records; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import static org.junit.Assert.assertEquals; + +/** + * Class that tests the allocation of OPPORTUNISTIC containers through the + * centralized ResourceManager. + */ +public class TestOpportunisticContainerAllocation { + private static Configuration conf = null; + private static MiniYARNCluster yarnCluster = null; + private static YarnClient yarnClient = null; + private static List nodeReports = null; + private static ApplicationAttemptId attemptId = null; + private static int nodeCount = 3; + + private static final int ROLLING_INTERVAL_SEC = 13; + private static final long AM_EXPIRE_MS = 4000; + + private static Resource capability; + private static Priority priority; + private static Priority priority2; + private static String node; + private static String rack; + private static String[] nodes; + private static String[] racks; + private final static int DEFAULT_ITERATION = 3; + + @BeforeClass + public static void setup() throws Exception { + // start minicluster + conf = new YarnConfiguration(); + conf.setLong( + YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, + ROLLING_INTERVAL_SEC); + conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, AM_EXPIRE_MS); + conf.setInt(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, 100); + // set the minimum allocation so that resource decrease can go under 1024 + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); + conf.setBoolean( + YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true); + conf.setInt( + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); + yarnCluster = + new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1); + yarnCluster.init(conf); + yarnCluster.start(); + + // start rm client + yarnClient = YarnClient.createYarnClient(); + yarnClient.init(conf); + yarnClient.start(); + + // get node info + nodeReports = yarnClient.getNodeReports(NodeState.RUNNING); + + priority = Priority.newInstance(1); + priority2 = Priority.newInstance(2); + capability = Resource.newInstance(1024, 1); + + node = nodeReports.get(0).getNodeId().getHost(); + rack = nodeReports.get(0).getRackName(); + nodes = new String[]{node}; + racks = new String[]{rack}; + } + + @Before + public void startApp() throws Exception { + // submit new app + ApplicationSubmissionContext appContext = + yarnClient.createApplication().getApplicationSubmissionContext(); + ApplicationId appId = appContext.getApplicationId(); + // set the application name + appContext.setApplicationName("Test"); + // Set the priority for the application master + Priority pri = Records.newRecord(Priority.class); + pri.setPriority(0); + appContext.setPriority(pri); + // Set the queue to which this application is to be submitted in the RM + appContext.setQueue("default"); + // Set up the container launch context for the application master + ContainerLaunchContext amContainer = BuilderUtils.newContainerLaunchContext( + Collections.emptyMap(), + new HashMap(), Arrays.asList("sleep", "100"), + new HashMap(), null, + new HashMap()); + appContext.setAMContainerSpec(amContainer); + appContext.setResource(Resource.newInstance(1024, 1)); + // Create the request to send to the applications manager + SubmitApplicationRequest appRequest = + Records.newRecord(SubmitApplicationRequest.class); + appRequest.setApplicationSubmissionContext(appContext); + // Submit the application to the applications manager + yarnClient.submitApplication(appContext); + + // wait for app to start + RMAppAttempt appAttempt = null; + while (true) { + ApplicationReport appReport = yarnClient.getApplicationReport(appId); + if (appReport.getYarnApplicationState() == + YarnApplicationState.ACCEPTED) { + attemptId = appReport.getCurrentApplicationAttemptId(); + appAttempt = yarnCluster.getResourceManager().getRMContext().getRMApps() + .get(attemptId.getApplicationId()).getCurrentAppAttempt(); + while (true) { + if (appAttempt.getAppAttemptState() == RMAppAttemptState.LAUNCHED) { + break; + } + } + break; + } + } + // Just dig into the ResourceManager and get the AMRMToken just for the sake + // of testing. + UserGroupInformation.setLoginUser(UserGroupInformation + .createRemoteUser(UserGroupInformation.getCurrentUser().getUserName())); + + // emulate RM setup of AMRM token in credentials by adding the token + // *before* setting the token service + UserGroupInformation.getCurrentUser().addToken(appAttempt.getAMRMToken()); + appAttempt.getAMRMToken() + .setService(ClientRMProxy.getAMRMTokenService(conf)); + } + + @After + public void cancelApp() throws YarnException, IOException { + yarnClient.killApplication(attemptId.getApplicationId()); + attemptId = null; + } + + @AfterClass + public static void tearDown() { + if (yarnClient != null && + yarnClient.getServiceState() == Service.STATE.STARTED) { + yarnClient.stop(); + } + if (yarnCluster != null && + yarnCluster.getServiceState() == Service.STATE.STARTED) { + yarnCluster.stop(); + } + } + + @Test(timeout = 60000) + public void testAMRMClient() throws YarnException, IOException { + AMRMClient amClient = null; + try { + // start am rm client + amClient = AMRMClient.createAMRMClient(); + + //setting an instance NMTokenCache + amClient.setNMTokenCache(new NMTokenCache()); + //asserting we are not using the singleton instance cache + Assert.assertNotSame(NMTokenCache.getSingleton(), + amClient.getNMTokenCache()); + + amClient.init(conf); + amClient.start(); + + amClient.registerApplicationMaster("Host", 10000, ""); + + testOpportunisticAllocation( + (AMRMClientImpl) amClient); + + testAllocation((AMRMClientImpl)amClient); + + amClient + .unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, + null); + + } finally { + if (amClient != null && + amClient.getServiceState() == Service.STATE.STARTED) { + amClient.stop(); + } + } + } + + private void testAllocation( + final AMRMClientImpl amClient) + throws YarnException, IOException { + // setup container request + assertEquals(0, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, null, null, priority2, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + + int containersRequestedNode = amClient.getTable(0).get(priority, + node, ExecutionType.GUARANTEED, capability).remoteRequest + .getNumContainers(); + int containersRequestedRack = amClient.getTable(0).get(priority, + rack, ExecutionType.GUARANTEED, capability).remoteRequest + .getNumContainers(); + int containersRequestedAny = amClient.getTable(0).get(priority, + ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + .remoteRequest.getNumContainers(); + int oppContainersRequestedAny = + amClient.getTable(0).get(priority2, ResourceRequest.ANY, + ExecutionType.OPPORTUNISTIC, capability).remoteRequest + .getNumContainers(); + + assertEquals(2, containersRequestedNode); + assertEquals(2, containersRequestedRack); + assertEquals(2, containersRequestedAny); + assertEquals(1, oppContainersRequestedAny); + + assertEquals(4, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + // RM should allocate container within 2 calls to allocate() + int allocatedContainerCount = 0; + int allocatedOpportContainerCount = 0; + int iterationsLeft = 10; + Set releases = new TreeSet<>(); + + amClient.getNMTokenCache().clearCache(); + Assert.assertEquals(0, + amClient.getNMTokenCache().numberOfTokensInCache()); + HashMap receivedNMTokens = new HashMap<>(); + + while (allocatedContainerCount < + containersRequestedAny + oppContainersRequestedAny + && iterationsLeft-- > 0) { + AllocateResponse allocResponse = amClient.allocate(0.1f); + assertEquals(0, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + allocatedContainerCount += allocResponse.getAllocatedContainers() + .size(); + for (Container container : allocResponse.getAllocatedContainers()) { + if (container.getExecutionType() == ExecutionType.OPPORTUNISTIC) { + allocatedOpportContainerCount++; + } + ContainerId rejectContainerId = container.getId(); + releases.add(rejectContainerId); + } + + for (NMToken token : allocResponse.getNMTokens()) { + String nodeID = token.getNodeId().toString(); + receivedNMTokens.put(nodeID, token.getToken()); + } + + if (allocatedContainerCount < containersRequestedAny) { + // sleep to let NM's heartbeat to RM and trigger allocations + sleep(100); + } + } + + assertEquals(allocatedContainerCount, + containersRequestedAny + oppContainersRequestedAny); + assertEquals(allocatedOpportContainerCount, oppContainersRequestedAny); + for (ContainerId rejectContainerId : releases) { + amClient.releaseAssignedContainer(rejectContainerId); + } + assertEquals(3, amClient.release.size()); + assertEquals(0, amClient.ask.size()); + + // need to tell the AMRMClient that we don't need these resources anymore + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); + amClient.removeContainerRequest( + new AMRMClient.ContainerRequest(capability, nodes, racks, priority2, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + assertEquals(4, amClient.ask.size()); + + iterationsLeft = 3; + // do a few iterations to ensure RM is not going to send new containers + while (iterationsLeft-- > 0) { + // inform RM of rejection + AllocateResponse allocResponse = amClient.allocate(0.1f); + // RM did not send new containers because AM does not need any + assertEquals(0, allocResponse.getAllocatedContainers().size()); + if (allocResponse.getCompletedContainersStatuses().size() > 0) { + for (ContainerStatus cStatus : allocResponse + .getCompletedContainersStatuses()) { + if (releases.contains(cStatus.getContainerId())) { + assertEquals(cStatus.getState(), ContainerState.COMPLETE); + assertEquals(-100, cStatus.getExitStatus()); + releases.remove(cStatus.getContainerId()); + } + } + } + if (iterationsLeft > 0) { + // sleep to make sure NM's heartbeat + sleep(100); + } + } + assertEquals(0, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + } + + /** + * Tests allocation with requests comprising only opportunistic containers. + */ + private void testOpportunisticAllocation( + final AMRMClientImpl amClient) + throws YarnException, IOException { + // setup container request + assertEquals(0, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, null, null, priority, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + amClient.addContainerRequest( + new AMRMClient.ContainerRequest(capability, null, null, priority, 0, + true, null, + ExecutionTypeRequest.newInstance( + ExecutionType.OPPORTUNISTIC, true))); + + int oppContainersRequestedAny = + amClient.getTable(0).get(priority, ResourceRequest.ANY, + ExecutionType.OPPORTUNISTIC, capability).remoteRequest + .getNumContainers(); + + assertEquals(2, oppContainersRequestedAny); + + assertEquals(1, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + // RM should allocate container within 2 calls to allocate() + int allocatedContainerCount = 0; + int iterationsLeft = 10; + Set releases = new TreeSet<>(); + + amClient.getNMTokenCache().clearCache(); + Assert.assertEquals(0, + amClient.getNMTokenCache().numberOfTokensInCache()); + HashMap receivedNMTokens = new HashMap<>(); + + while (allocatedContainerCount < oppContainersRequestedAny + && iterationsLeft-- > 0) { + AllocateResponse allocResponse = amClient.allocate(0.1f); + assertEquals(0, amClient.ask.size()); + assertEquals(0, amClient.release.size()); + + for (Container container : allocResponse.getAllocatedContainers()) { + allocatedContainerCount++; + ContainerId rejectContainerId = container.getId(); + releases.add(rejectContainerId); + } + + for (NMToken token : allocResponse.getNMTokens()) { + String nodeID = token.getNodeId().toString(); + receivedNMTokens.put(nodeID, token.getToken()); + } + + if (allocatedContainerCount < oppContainersRequestedAny) { + // sleep to let NM's heartbeat to RM and trigger allocations + sleep(100); + } + } + + assertEquals(1, receivedNMTokens.values().size()); + } + + private void sleep(int sleepTime) { + try { + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java index bb50671..b424839 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java @@ -282,8 +282,8 @@ public synchronized void setAllocatedContainers( final List containers) { if (containers == null) return; - // this looks like a bug because it results in append and not set initLocalNewContainerList(); + allocatedContainers.clear(); allocatedContainers.addAll(containers); } @@ -299,6 +299,7 @@ public synchronized void setUpdatedContainers( if (containers == null) return; initLocalUpdatedContainerList(); + updatedContainers.clear(); updatedContainers.addAll(containers); } @@ -315,6 +316,7 @@ public synchronized void setCompletedContainersStatuses( if (containers == null) return; initLocalFinishedContainerList(); + completedContainersStatuses.clear(); completedContainersStatuses.addAll(containers); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 79df3ba..d6ea32b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -973,10 +973,10 @@ - Enable Queuing of OPPORTUNISTIC containers on the + Max number of OPPORTUNISTIC containers to queue at the nodemanager. - yarn.nodemanager.container-queuing-enabled - false + yarn.nodemanager.opportunistic-containers-max-queue-length + 0 @@ -2693,7 +2693,7 @@ Setting that controls whether distributed scheduling is enabled. - yarn.distributed-scheduling.enabled + yarn.nodemanager.distributed-scheduling.enabled false @@ -2702,73 +2702,16 @@ Setting that controls whether opportunistic container allocation is enabled. - yarn.opportunistic-container-allocation.enabled + yarn.resourcemanager.opportunistic-container-allocation.enabled false - Minimum memory (in MB) used for allocating an opportunistic container. - - yarn.opportunistic-containers.min-memory-mb - 512 - - - - - Minimum virtual CPU cores used for allocating an opportunistic container. - - yarn.opportunistic-containers.min-vcores - 1 - - - - - Maximum memory (in MB) used for allocating an opportunistic container. - - yarn.opportunistic-containers.max-memory-mb - 2048 - - - - - Maximum virtual CPU cores used for allocating an opportunistic container. - - yarn.opportunistic-containers.max-vcores - 4 - - - - - Incremental memory (in MB) used for allocating an opportunistic container. - - yarn.opportunistic-containers.incr-memory-mb - 512 - - - - - Incremental virtual CPU cores used for allocating an opportunistic - container. - - yarn.opportunistic-containers.incr-vcores - 1 - - - - - Container token expiry for opportunistic containers. - - yarn.opportunistic-containers.container-token-expiry-ms - 600000 - - - - Number of nodes to be used by the Opportunistic Container Allocator for dispatching containers during container allocation. - yarn.opportunistic-container-allocation.nodes-used + yarn.resourcemanager.opportunistic-container-allocation.nodes-used 10 @@ -2776,7 +2719,7 @@ Frequency for computing least loaded NMs. - yarn.nm-container-queuing.sorting-nodes-interval-ms + yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms 1000 @@ -2784,7 +2727,7 @@ Comparator for determining node load for Distributed Scheduling. - yarn.nm-container-queuing.load-comparator + yarn.resourcemanager.nm-container-queuing.load-comparator QUEUE_LENGTH @@ -2792,7 +2735,7 @@ Value of standard deviation used for calculation of queue limit thresholds. - yarn.nm-container-queuing.queue-limit-stdev + yarn.resourcemanager.nm-container-queuing.queue-limit-stdev 1.0f @@ -2800,32 +2743,32 @@ Min length of container queue at NodeManager. - yarn.nm-container-queuing.min-queue-length - 1 + yarn.resourcemanager.nm-container-queuing.min-queue-length + 5 Max length of container queue at NodeManager. - yarn.nm-container-queuing.max-queue-length - 10 + yarn.resourcemanager.nm-container-queuing.max-queue-length + 15 Min queue wait time for a container at a NodeManager. - yarn.nm-container-queuing.min-queue-wait-time-ms - 1 + yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms + 10 Max queue wait time for a container queue at a NodeManager. - yarn.nm-container-queuing.max-queue-wait-time-ms - 10 + yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms + 100 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/DistributedSchedulingAllocateResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/DistributedSchedulingAllocateResponse.java index 7a40449..edc0cf8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/DistributedSchedulingAllocateResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/DistributedSchedulingAllocateResponse.java @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; -import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.util.Records; import java.util.List; @@ -58,9 +57,10 @@ public static DistributedSchedulingAllocateResponse newInstance( @Public @Unstable - public abstract void setNodesForScheduling(List nodesForScheduling); + public abstract void setNodesForScheduling( + List nodesForScheduling); @Public @Unstable - public abstract List getNodesForScheduling(); + public abstract List getNodesForScheduling(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RegisterDistributedSchedulingAMResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RegisterDistributedSchedulingAMResponse.java index a0a0e38..f7d8df2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RegisterDistributedSchedulingAMResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RegisterDistributedSchedulingAMResponse.java @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; -import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.util.Records; @@ -99,10 +98,11 @@ public abstract void setRegisterResponse( @Public @Unstable - public abstract void setNodesForScheduling(List nodesForScheduling); + public abstract void setNodesForScheduling( + List nodesForScheduling); @Public @Unstable - public abstract List getNodesForScheduling(); + public abstract List getNodesForScheduling(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoteNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoteNode.java new file mode 100644 index 0000000..e403a12 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/RemoteNode.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.util.Records; + +/** + * This class is used to encapsulate the {@link NodeId} as well as the HTTP + * address that can be used to communicate with the Node. + */ +@Private +@Unstable +public abstract class RemoteNode implements Comparable { + + /** + * Create new Instance. + * @param nodeId NodeId. + * @param httpAddress Http address. + * @return RemoteNode instance. + */ + @Private + @Unstable + public static RemoteNode newInstance(NodeId nodeId, String httpAddress) { + RemoteNode remoteNode = Records.newRecord(RemoteNode.class); + remoteNode.setNodeId(nodeId); + remoteNode.setHttpAddress(httpAddress); + return remoteNode; + } + + /** + * Get {@link NodeId}. + * @return NodeId. + */ + @Private + @Unstable + public abstract NodeId getNodeId(); + + /** + * Set {@link NodeId}. + * @param nodeId NodeId. + */ + @Private + @Unstable + public abstract void setNodeId(NodeId nodeId); + + /** + * Get HTTP address. + * @return Http Address. + */ + @Private + @Unstable + public abstract String getHttpAddress(); + + /** + * Set HTTP address. + * @param httpAddress HTTP address. + */ + @Private + @Unstable + public abstract void setHttpAddress(String httpAddress); + + /** + * Use the underlying {@link NodeId} comparator. + * @param other RemoteNode. + * @return Comparison. + */ + @Override + public int compareTo(RemoteNode other) { + return this.getNodeId().compareTo(other.getNodeId()); + } + + @Override + public String toString() { + return "RemoteNode{" + + "nodeId=" + getNodeId() + ", " + + "httpAddress=" + getHttpAddress() + "}"; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/DistributedSchedulingAllocateResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/DistributedSchedulingAllocateResponsePBImpl.java index 18d5073..8c48b61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/DistributedSchedulingAllocateResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/DistributedSchedulingAllocateResponsePBImpl.java @@ -21,12 +21,13 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; -import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RemoteNodeProto; import org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse; + +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; + import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -45,7 +46,7 @@ boolean viaProto = false; private AllocateResponse allocateResponse; - private List nodesForScheduling; + private List nodesForScheduling; public DistributedSchedulingAllocateResponsePBImpl() { builder = YarnServerCommonServiceProtos. @@ -86,8 +87,8 @@ private synchronized void mergeLocalToProto() { private synchronized void mergeLocalToBuilder() { if (this.nodesForScheduling != null) { builder.clearNodesForScheduling(); - Iterable iterable = getNodeIdProtoIterable( - this.nodesForScheduling); + Iterable iterable = + getNodeIdProtoIterable(this.nodesForScheduling); builder.addAllNodesForScheduling(iterable); } if (this.allocateResponse != null) { @@ -123,7 +124,7 @@ public AllocateResponse getAllocateResponse() { } @Override - public void setNodesForScheduling(List nodesForScheduling) { + public void setNodesForScheduling(List nodesForScheduling) { maybeInitBuilder(); if (nodesForScheduling == null || nodesForScheduling.isEmpty()) { if (this.nodesForScheduling != null) { @@ -137,7 +138,7 @@ public void setNodesForScheduling(List nodesForScheduling) { } @Override - public List getNodesForScheduling() { + public List getNodesForScheduling() { if (nodesForScheduling != null) { return nodesForScheduling; } @@ -149,24 +150,25 @@ private synchronized void initLocalNodesForSchedulingList() { YarnServerCommonServiceProtos. DistributedSchedulingAllocateResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getNodesForSchedulingList(); + List list = + p.getNodesForSchedulingList(); nodesForScheduling = new ArrayList<>(); if (list != null) { - for (YarnProtos.NodeIdProto t : list) { - nodesForScheduling.add(ProtoUtils.convertFromProtoFormat(t)); + for (YarnServerCommonServiceProtos.RemoteNodeProto t : list) { + nodesForScheduling.add(new RemoteNodePBImpl(t)); } } } - private synchronized Iterable getNodeIdProtoIterable( - final List nodeList) { + private synchronized Iterable getNodeIdProtoIterable( + final List nodeList) { maybeInitBuilder(); - return new Iterable() { + return new Iterable() { @Override - public synchronized Iterator iterator() { - return new Iterator() { + public synchronized Iterator iterator() { + return new Iterator() { - Iterator iter = nodeList.iterator(); + Iterator iter = nodeList.iterator(); @Override public boolean hasNext() { @@ -174,8 +176,8 @@ public boolean hasNext() { } @Override - public YarnProtos.NodeIdProto next() { - return ProtoUtils.convertToProtoFormat(iter.next()); + public RemoteNodeProto next() { + return ((RemoteNodePBImpl)iter.next()).getProto(); } @Override @@ -186,5 +188,4 @@ public void remove() { } }; } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RegisterDistributedSchedulingAMResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RegisterDistributedSchedulingAMResponsePBImpl.java index 4aaf99c..41b2a4f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RegisterDistributedSchedulingAMResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RegisterDistributedSchedulingAMResponsePBImpl.java @@ -23,13 +23,15 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.RegisterApplicationMasterResponsePBImpl; -import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; -import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos; +import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RemoteNodeProto; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterDistributedSchedulingAMResponse; + +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; + import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -52,7 +54,7 @@ private Resource maxContainerResource; private Resource minContainerResource; private Resource incrContainerResource; - private List nodesForScheduling; + private List nodesForScheduling; private RegisterApplicationMasterResponse registerApplicationMasterResponse; public RegisterDistributedSchedulingAMResponsePBImpl() { @@ -95,8 +97,8 @@ private synchronized void mergeLocalToProto() { private synchronized void mergeLocalToBuilder() { if (this.nodesForScheduling != null) { builder.clearNodesForScheduling(); - Iterable iterable = getNodeIdProtoIterable( - this.nodesForScheduling); + Iterable iterable = + getNodeIdProtoIterable(this.nodesForScheduling); builder.addAllNodesForScheduling(iterable); } if (this.maxContainerResource != null) { @@ -261,7 +263,7 @@ public long getContainerIdStart() { } @Override - public void setNodesForScheduling(List nodesForScheduling) { + public void setNodesForScheduling(List nodesForScheduling) { maybeInitBuilder(); if (nodesForScheduling == null || nodesForScheduling.isEmpty()) { if (this.nodesForScheduling != null) { @@ -275,7 +277,7 @@ public void setNodesForScheduling(List nodesForScheduling) { } @Override - public List getNodesForScheduling() { + public List getNodesForScheduling() { if (nodesForScheduling != null) { return nodesForScheduling; } @@ -287,24 +289,25 @@ private synchronized void initLocalNodesForSchedulingList() { YarnServerCommonServiceProtos. RegisterDistributedSchedulingAMResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getNodesForSchedulingList(); + List list = + p.getNodesForSchedulingList(); nodesForScheduling = new ArrayList<>(); if (list != null) { - for (YarnProtos.NodeIdProto t : list) { - nodesForScheduling.add(ProtoUtils.convertFromProtoFormat(t)); + for (YarnServerCommonServiceProtos.RemoteNodeProto t : list) { + nodesForScheduling.add(new RemoteNodePBImpl(t)); } } } - private synchronized Iterable getNodeIdProtoIterable( - final List nodeList) { + private synchronized Iterable getNodeIdProtoIterable( + final List nodeList) { maybeInitBuilder(); - return new Iterable() { + return new Iterable() { @Override - public synchronized Iterator iterator() { - return new Iterator() { + public synchronized Iterator iterator() { + return new Iterator() { - Iterator iter = nodeList.iterator(); + Iterator iter = nodeList.iterator(); @Override public boolean hasNext() { @@ -312,8 +315,8 @@ public boolean hasNext() { } @Override - public YarnProtos.NodeIdProto next() { - return ProtoUtils.convertToProtoFormat(iter.next()); + public RemoteNodeProto next() { + return ((RemoteNodePBImpl)iter.next()).getProto(); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoteNodePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoteNodePBImpl.java new file mode 100644 index 0000000..3e4fd4a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/RemoteNodePBImpl.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RemoteNodeProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RemoteNodeProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; + +/** + * Implementation of {@link RemoteNode}. + */ +public class RemoteNodePBImpl extends RemoteNode { + + private RemoteNodeProto proto = RemoteNodeProto.getDefaultInstance(); + private RemoteNodeProto.Builder builder = null; + private boolean viaProto = false; + + private NodeId nodeId = null; + + public RemoteNodePBImpl() { + builder = RemoteNodeProto.newBuilder(); + } + + public RemoteNodePBImpl(RemoteNodeProto proto) { + this.proto = proto; + viaProto = true; + } + + public RemoteNodeProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToBuilder() { + if (this.nodeId != null + && !((NodeIdPBImpl) nodeId).getProto().equals( + builder.getNodeId())) { + builder.setNodeId(ProtoUtils.convertToProtoFormat(this.nodeId)); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = RemoteNodeProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public NodeId getNodeId() { + RemoteNodeProtoOrBuilder p = viaProto ? proto : builder; + if (this.nodeId != null) { + return this.nodeId; + } + if (!p.hasNodeId()) { + return null; + } + this.nodeId = ProtoUtils.convertFromProtoFormat(p.getNodeId()); + return this.nodeId; + } + + @Override + public void setNodeId(NodeId nodeId) { + maybeInitBuilder(); + if (nodeId == null) { + builder.clearNodeId(); + } + this.nodeId = nodeId; + } + + @Override + public String getHttpAddress() { + RemoteNodeProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasHttpAddress()) { + return null; + } + return (p.getHttpAddress()); + } + + @Override + public void setHttpAddress(String httpAddress) { + maybeInitBuilder(); + if (httpAddress == null) { + builder.clearHttpAddress(); + return; + } + builder.setHttpAddress(httpAddress); + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java index 89e054b..440cd0a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java @@ -125,10 +125,11 @@ public abstract void setIncreasedContainers( @Private @Unstable - public abstract QueuedContainersStatus getQueuedContainersStatus(); + public abstract OpportunisticContainersStatus + getOpportunisticContainersStatus(); @Private @Unstable - public abstract void setQueuedContainersStatus( - QueuedContainersStatus queuedContainersStatus); + public abstract void setOpportunisticContainersStatus( + OpportunisticContainersStatus opportunisticContainersStatus); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/OpportunisticContainersStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/OpportunisticContainersStatus.java new file mode 100644 index 0000000..732db2a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/OpportunisticContainersStatus.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.util.Records; + +/** + *

OpportunisticContainersStatus captures information + * pertaining to the state of execution of the opportunistic containers within a + * node.

+ */ +@Private +@Unstable +public abstract class OpportunisticContainersStatus { + public static OpportunisticContainersStatus newInstance() { + return Records.newRecord(OpportunisticContainersStatus.class); + } + + /** + * Returns the number of currently running opportunistic containers on the + * node. + * + * @return number of running opportunistic containers. + */ + @Private + @Unstable + public abstract int getRunningOpportContainers(); + + /** + * Sets the number of running opportunistic containers. + * + * @param runningOpportContainers number of running opportunistic containers. + */ + @Private + @Unstable + public abstract void setRunningOpportContainers(int runningOpportContainers); + + /** + * Returns memory currently used on the node for running opportunistic + * containers. + * + * @return memory (in bytes) used for running opportunistic containers. + */ + @Private + @Unstable + public abstract long getOpportMemoryUsed(); + + /** + * Sets the memory used on the node for running opportunistic containers. + * + * @param opportMemoryUsed memory (in bytes) used for running opportunistic + * containers. + */ + @Private + @Unstable + public abstract void setOpportMemoryUsed(long opportMemoryUsed); + + /** + * Returns CPU cores currently used on the node for running opportunistic + * containers. + * + * @return CPU cores used for running opportunistic containers. + */ + @Private + @Unstable + public abstract int getOpportCoresUsed(); + + /** + * Sets the CPU cores used on the node for running opportunistic containers. + * + * @param opportCoresUsed memory (in bytes) used for running opportunistic + * containers. + */ + @Private + @Unstable + public abstract void setOpportCoresUsed(int opportCoresUsed); + + /** + * Returns the number of queued opportunistic containers on the node. + * + * @return number of queued opportunistic containers. + */ + @Private + @Unstable + public abstract int getQueuedOpportContainers(); + + /** + * Sets the number of queued opportunistic containers on the node. + * + * @param queuedOpportContainers number of queued opportunistic containers. + */ + @Private + @Unstable + public abstract void setQueuedOpportContainers(int queuedOpportContainers); + + /** + * Returns the length of the containers queue on the node. + * + * @return length of the containers queue. + */ + @Private + @Unstable + public abstract int getWaitQueueLength(); + + /** + * Sets the length of the containers queue on the node. + * + * @param waitQueueLength length of the containers queue. + */ + @Private + @Unstable + public abstract void setWaitQueueLength(int waitQueueLength); + + /** + * Returns the estimated time that a container will have to wait if added to + * the queue of the node. + * + * @return estimated queuing time. + */ + @Private + @Unstable + public abstract int getEstimatedQueueWaitTime(); + + /** + * Sets the estimated time that a container will have to wait if added to the + * queue of the node. + * + * @param queueWaitTime estimated queuing time. + */ + @Private + @Unstable + public abstract void setEstimatedQueueWaitTime(int queueWaitTime); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java index d6a1737..8aebc6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java @@ -41,9 +41,9 @@ import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.QueuedContainersStatusProto; +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.OpportunisticContainersStatusProto; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus; @@ -404,25 +404,25 @@ public synchronized void setIncreasedContainers( } @Override - public synchronized QueuedContainersStatus getQueuedContainersStatus() { - NodeStatusProtoOrBuilder p = - this.viaProto ? this.proto : this.builder; - if (!p.hasQueuedContainerStatus()) { + public synchronized OpportunisticContainersStatus + getOpportunisticContainersStatus() { + NodeStatusProtoOrBuilder p = this.viaProto ? this.proto : this.builder; + if (!p.hasOpportunisticContainersStatus()) { return null; } - return convertFromProtoFormat(p.getQueuedContainerStatus()); + return convertFromProtoFormat(p.getOpportunisticContainersStatus()); } @Override - public synchronized void setQueuedContainersStatus( - QueuedContainersStatus queuedContainersStatus) { + public synchronized void setOpportunisticContainersStatus( + OpportunisticContainersStatus opportunisticContainersStatus) { maybeInitBuilder(); - if (queuedContainersStatus == null) { - this.builder.clearQueuedContainerStatus(); + if (opportunisticContainersStatus == null) { + this.builder.clearOpportunisticContainersStatus(); return; } - this.builder.setQueuedContainerStatus( - convertToProtoFormat(queuedContainersStatus)); + this.builder.setOpportunisticContainersStatus( + convertToProtoFormat(opportunisticContainersStatus)); } private NodeIdProto convertToProtoFormat(NodeId nodeId) { @@ -468,14 +468,14 @@ private ResourceUtilizationPBImpl convertFromProtoFormat( return new ResourceUtilizationPBImpl(p); } - private QueuedContainersStatusProto convertToProtoFormat( - QueuedContainersStatus r) { - return ((QueuedContainersStatusPBImpl) r).getProto(); + private OpportunisticContainersStatusProto convertToProtoFormat( + OpportunisticContainersStatus r) { + return ((OpportunisticContainersStatusPBImpl) r).getProto(); } - private QueuedContainersStatus convertFromProtoFormat( - QueuedContainersStatusProto p) { - return new QueuedContainersStatusPBImpl(p); + private OpportunisticContainersStatus convertFromProtoFormat( + OpportunisticContainersStatusProto p) { + return new OpportunisticContainersStatusPBImpl(p); } private ContainerPBImpl convertFromProtoFormat( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/OpportunisticContainersStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/OpportunisticContainersStatusPBImpl.java new file mode 100644 index 0000000..8399713 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/OpportunisticContainersStatusPBImpl.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.records.impl.pb; + +import org.apache.hadoop.yarn.proto.YarnServerCommonProtos; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; + +/** + * Protocol Buffer implementation of OpportunisticContainersStatus. + */ +public class OpportunisticContainersStatusPBImpl + extends OpportunisticContainersStatus { + + private YarnServerCommonProtos.OpportunisticContainersStatusProto proto = + YarnServerCommonProtos.OpportunisticContainersStatusProto + .getDefaultInstance(); + private YarnServerCommonProtos.OpportunisticContainersStatusProto.Builder + builder = null; + private boolean viaProto = false; + + public OpportunisticContainersStatusPBImpl() { + builder = + YarnServerCommonProtos.OpportunisticContainersStatusProto.newBuilder(); + } + + public OpportunisticContainersStatusPBImpl(YarnServerCommonProtos + .OpportunisticContainersStatusProto proto) { + this.proto = proto; + viaProto = true; + } + + public YarnServerCommonProtos.OpportunisticContainersStatusProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = YarnServerCommonProtos.OpportunisticContainersStatusProto + .newBuilder(proto); + } + viaProto = false; + } + + @Override + public int getRunningOpportContainers() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getRunningOpportContainers(); + } + + @Override + public void setRunningOpportContainers(int runningOpportContainers) { + maybeInitBuilder(); + builder.setRunningOpportContainers(runningOpportContainers); + } + + @Override + public long getOpportMemoryUsed() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getOpportMemoryUsed(); + } + + @Override + public void setOpportMemoryUsed(long opportMemoryUsed) { + maybeInitBuilder(); + builder.setOpportMemoryUsed(opportMemoryUsed); + } + + @Override + public int getOpportCoresUsed() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getOpportCoresUsed(); + } + + @Override + public void setOpportCoresUsed(int opportCoresUsed) { + maybeInitBuilder(); + builder.setOpportCoresUsed(opportCoresUsed); + } + + @Override + public int getQueuedOpportContainers() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getQueuedOpportContainers(); + } + + @Override + public void setQueuedOpportContainers(int queuedOpportContainers) { + maybeInitBuilder(); + builder.setQueuedOpportContainers(queuedOpportContainers); + } + + @Override + public int getWaitQueueLength() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getWaitQueueLength(); + } + + @Override + public void setWaitQueueLength(int waitQueueLength) { + maybeInitBuilder(); + builder.setWaitQueueLength(waitQueueLength); + } + + @Override + public int getEstimatedQueueWaitTime() { + YarnServerCommonProtos.OpportunisticContainersStatusProtoOrBuilder p = + viaProto ? proto : builder; + return p.getEstimatedQueueWaitTime(); + } + + @Override + public void setEstimatedQueueWaitTime(int queueWaitTime) { + maybeInitBuilder(); + builder.setEstimatedQueueWaitTime(queueWaitTime); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java deleted file mode 100644 index 16b80dd..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/QueuedContainersStatusPBImpl.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.api.records.impl.pb; - -import org.apache.hadoop.yarn.proto.YarnServerCommonProtos; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; - -/** - * Protocol Buffer implementation of QueuedContainersStatus. - */ -public class QueuedContainersStatusPBImpl extends QueuedContainersStatus { - - private YarnServerCommonProtos.QueuedContainersStatusProto proto = - YarnServerCommonProtos.QueuedContainersStatusProto.getDefaultInstance(); - private YarnServerCommonProtos.QueuedContainersStatusProto.Builder builder = - null; - private boolean viaProto = false; - - public QueuedContainersStatusPBImpl() { - builder = YarnServerCommonProtos.QueuedContainersStatusProto.newBuilder(); - } - - public QueuedContainersStatusPBImpl(YarnServerCommonProtos - .QueuedContainersStatusProto proto) { - this.proto = proto; - viaProto = true; - } - - public YarnServerCommonProtos.QueuedContainersStatusProto getProto() { - proto = viaProto ? proto : builder.build(); - viaProto = true; - return proto; - } - - private void maybeInitBuilder() { - if (viaProto || builder == null) { - builder = - YarnServerCommonProtos.QueuedContainersStatusProto.newBuilder(proto); - } - viaProto = false; - } - - @Override - public int getEstimatedQueueWaitTime() { - YarnServerCommonProtos.QueuedContainersStatusProtoOrBuilder p = - viaProto ? proto : builder; - return p.getEstimatedQueueWaitTime(); - } - - @Override - public void setEstimatedQueueWaitTime(int queueWaitTime) { - maybeInitBuilder(); - builder.setEstimatedQueueWaitTime(queueWaitTime); - } - - @Override - public int getWaitQueueLength() { - YarnServerCommonProtos.QueuedContainersStatusProtoOrBuilder p = - viaProto ? proto : builder; - return p.getWaitQueueLength(); - } - - @Override - public void setWaitQueueLength(int waitQueueLength) { - maybeInitBuilder(); - builder.setWaitQueueLength(waitQueueLength); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerAllocator.java index 9b2fd38..c1300b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerAllocator.java @@ -22,13 +22,21 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; -import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.ContainerType; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.apache.hadoop.yarn.server.security.BaseContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; @@ -145,15 +153,6 @@ public void resetContainerIdCounter(long containerIdStart) { } /** - * Sets the underlying Atomic Long. To be used when implementation needs to - * share the underlying AtomicLong of an existing counter. - * @param counter AtomicLong - */ - public void setContainerIdCounter(AtomicLong counter) { - this.containerIdCounter = counter; - } - - /** * Generates a new long value. Default implementation increments the * underlying AtomicLong. Sub classes are encouraged to over-ride this * behaviour. @@ -164,12 +163,18 @@ public long generateContainerId() { } } - static class PartitionedResourceRequests { + /** + * Class that includes two lists of {@link ResourceRequest}s: one for + * GUARANTEED and one for OPPORTUNISTIC {@link ResourceRequest}s. + */ + public static class PartitionedResourceRequests { private List guaranteed = new ArrayList<>(); private List opportunistic = new ArrayList<>(); + public List getGuaranteed() { return guaranteed; } + public List getOpportunistic() { return opportunistic; } @@ -182,92 +187,81 @@ public long generateContainerId() { new DominantResourceCalculator(); private final BaseContainerTokenSecretManager tokenSecretManager; - private int webpagePort; /** * Create a new Opportunistic Container Allocator. * @param tokenSecretManager TokenSecretManager - * @param webpagePort Webpage Port */ public OpportunisticContainerAllocator( - BaseContainerTokenSecretManager tokenSecretManager, int webpagePort) { + BaseContainerTokenSecretManager tokenSecretManager) { this.tokenSecretManager = tokenSecretManager; - this.webpagePort = webpagePort; } /** - * Entry point into the Opportunistic Container Allocator. - * @param request AllocateRequest + * Allocate OPPORTUNISTIC containers. + * @param blackList Resource BlackList Request + * @param oppResourceReqs Opportunistic Resource Requests * @param applicationAttemptId ApplicationAttemptId - * @param appContext App Specific OpportunisticContainerContext + * @param opportContext App specific OpportunisticContainerContext * @param rmIdentifier RM Identifier * @param appSubmitter App Submitter * @return List of Containers. * @throws YarnException YarnException */ - public List allocateContainers( - AllocateRequest request, ApplicationAttemptId applicationAttemptId, - OpportunisticContainerContext appContext, long rmIdentifier, + public List allocateContainers(ResourceBlacklistRequest blackList, + List oppResourceReqs, + ApplicationAttemptId applicationAttemptId, + OpportunisticContainerContext opportContext, long rmIdentifier, String appSubmitter) throws YarnException { - // Partition requests into GUARANTEED and OPPORTUNISTIC reqs - PartitionedResourceRequests partitionedAsks = - partitionAskList(request.getAskList()); - - List releasedContainers = request.getReleaseList(); - int numReleasedContainers = releasedContainers.size(); - if (numReleasedContainers > 0) { - LOG.info("AttemptID: " + applicationAttemptId + " released: " - + numReleasedContainers); - appContext.getContainersAllocated().removeAll(releasedContainers); - } - // Also, update black list - ResourceBlacklistRequest rbr = request.getResourceBlacklistRequest(); - if (rbr != null) { - appContext.getBlacklist().removeAll(rbr.getBlacklistRemovals()); - appContext.getBlacklist().addAll(rbr.getBlacklistAdditions()); + // Update black list. + if (blackList != null) { + opportContext.getBlacklist().removeAll(blackList.getBlacklistRemovals()); + opportContext.getBlacklist().addAll(blackList.getBlacklistAdditions()); } - // Add OPPORTUNISTIC reqs to the outstanding reqs - appContext.addToOutstandingReqs(partitionedAsks.getOpportunistic()); + // Add OPPORTUNISTIC requests to the outstanding ones. + opportContext.addToOutstandingReqs(oppResourceReqs); + // Satisfy the outstanding OPPORTUNISTIC requests. List allocatedContainers = new ArrayList<>(); - for (Priority priority : - appContext.getOutstandingOpReqs().descendingKeySet()) { + for (SchedulerRequestKey schedulerKey : + opportContext.getOutstandingOpReqs().descendingKeySet()) { // Allocated containers : // Key = Requested Capability, - // Value = List of Containers of given Cap (The actual container size - // might be different than what is requested.. which is why + // Value = List of Containers of given cap (the actual container size + // might be different than what is requested, which is why // we need the requested capability (key) to match against // the outstanding reqs) Map> allocated = allocate(rmIdentifier, - appContext, priority, applicationAttemptId, appSubmitter); + opportContext, schedulerKey, applicationAttemptId, appSubmitter); for (Map.Entry> e : allocated.entrySet()) { - appContext.matchAllocationToOutstandingRequest( + opportContext.matchAllocationToOutstandingRequest( e.getKey(), e.getValue()); allocatedContainers.addAll(e.getValue()); } } - // Send all the GUARANTEED Reqs to RM - request.setAskList(partitionedAsks.getGuaranteed()); return allocatedContainers; } private Map> allocate(long rmIdentifier, - OpportunisticContainerContext appContext, Priority priority, + OpportunisticContainerContext appContext, SchedulerRequestKey schedKey, ApplicationAttemptId appAttId, String userName) throws YarnException { Map> containers = new HashMap<>(); for (ResourceRequest anyAsk : - appContext.getOutstandingOpReqs().get(priority).values()) { + appContext.getOutstandingOpReqs().get(schedKey).values()) { allocateContainersInternal(rmIdentifier, appContext.getAppParams(), appContext.getContainerIdGenerator(), appContext.getBlacklist(), appAttId, appContext.getNodeMap(), userName, containers, anyAsk); - LOG.info("Opportunistic allocation requested for [" - + "priority=" + anyAsk.getPriority() - + ", num_containers=" + anyAsk.getNumContainers() - + ", capability=" + anyAsk.getCapability() + "]" - + " allocated = " + containers.get(anyAsk.getCapability()).size()); + if (!containers.isEmpty()) { + LOG.info("Opportunistic allocation requested for [" + + "priority=" + anyAsk.getPriority() + + ", allocationRequestId=" + anyAsk.getAllocationRequestId() + + ", num_containers=" + anyAsk.getNumContainers() + + ", capability=" + anyAsk.getCapability() + "]" + + " allocated = " + containers.keySet()); + } } return containers; } @@ -275,29 +269,35 @@ public OpportunisticContainerAllocator( private void allocateContainersInternal(long rmIdentifier, AllocationParams appParams, ContainerIdGenerator idCounter, Set blacklist, ApplicationAttemptId id, - Map allNodes, String userName, + Map allNodes, String userName, Map> containers, ResourceRequest anyAsk) throws YarnException { int toAllocate = anyAsk.getNumContainers() - (containers.isEmpty() ? 0 : containers.get(anyAsk.getCapability()).size()); - List nodesForScheduling = new ArrayList<>(); - for (Entry nodeEntry : allNodes.entrySet()) { + List nodesForScheduling = new ArrayList<>(); + for (Entry nodeEntry : allNodes.entrySet()) { // Do not use blacklisted nodes for scheduling. if (blacklist.contains(nodeEntry.getKey())) { continue; } nodesForScheduling.add(nodeEntry.getValue()); } + if (nodesForScheduling.isEmpty()) { + LOG.warn("No nodes available for allocating opportunistic containers. [" + + "allNodes=" + allNodes + ", " + + "blacklist=" + blacklist + "]"); + return; + } int numAllocated = 0; int nextNodeToSchedule = 0; for (int numCont = 0; numCont < toAllocate; numCont++) { nextNodeToSchedule++; nextNodeToSchedule %= nodesForScheduling.size(); - NodeId nodeId = nodesForScheduling.get(nextNodeToSchedule); + RemoteNode node = nodesForScheduling.get(nextNodeToSchedule); Container container = buildContainer(rmIdentifier, appParams, idCounter, - anyAsk, id, userName, nodeId); + anyAsk, id, userName, node); List cList = containers.get(anyAsk.getCapability()); if (cList == null) { cList = new ArrayList<>(); @@ -313,7 +313,7 @@ private void allocateContainersInternal(long rmIdentifier, private Container buildContainer(long rmIdentifier, AllocationParams appParams, ContainerIdGenerator idCounter, ResourceRequest rr, ApplicationAttemptId id, String userName, - NodeId nodeId) throws YarnException { + RemoteNode node) throws YarnException { ContainerId cId = ContainerId.newContainerId(id, idCounter.generateContainerId()); @@ -324,7 +324,7 @@ private Container buildContainer(long rmIdentifier, long currTime = System.currentTimeMillis(); ContainerTokenIdentifier containerTokenIdentifier = new ContainerTokenIdentifier( - cId, 0, nodeId.getHost() + ":" + nodeId.getPort(), userName, + cId, 0, node.getNodeId().toString(), userName, capability, currTime + appParams.containerTokenExpiryInterval, tokenSecretManager.getCurrentKey().getKeyId(), rmIdentifier, rr.getPriority(), currTime, @@ -332,10 +332,10 @@ private Container buildContainer(long rmIdentifier, ExecutionType.OPPORTUNISTIC); byte[] pwd = tokenSecretManager.createPassword(containerTokenIdentifier); - Token containerToken = newContainerToken(nodeId, pwd, + Token containerToken = newContainerToken(node.getNodeId(), pwd, containerTokenIdentifier); Container container = BuilderUtils.newContainer( - cId, nodeId, nodeId.getHost() + ":" + webpagePort, + cId, node.getNodeId(), node.getHttpAddress(), capability, rr.getPriority(), containerToken, containerTokenIdentifier.getExecutionType(), rr.getAllocationRequestId()); @@ -361,8 +361,14 @@ private static Token newContainerToken(NodeId nodeId, byte[] password, return containerToken; } - private PartitionedResourceRequests partitionAskList(List - askList) { + /** + * Partitions a list of ResourceRequest to two separate lists, one for + * GUARANTEED and one for OPPORTUNISTIC ResourceRequests. + * @param askList the list of ResourceRequests to be partitioned + * @return the partitioned ResourceRequests + */ + public PartitionedResourceRequests partitionAskList( + List askList) { PartitionedResourceRequests partitionedRequests = new PartitionedResourceRequests(); for (ResourceRequest rr : askList) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerContext.java index 1b701ea..a2f9f4d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/OpportunisticContainerContext.java @@ -19,18 +19,17 @@ package org.apache.hadoop.yarn.server.scheduler; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.NMToken; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -48,33 +47,24 @@ private static final Logger LOG = LoggerFactory .getLogger(OpportunisticContainerContext.class); - // Currently just used to keep track of allocated containers. - // Can be used for reporting stats later. - private Set containersAllocated = new HashSet<>(); private AllocationParams appParams = new AllocationParams(); private ContainerIdGenerator containerIdGenerator = new ContainerIdGenerator(); - private Map nodeMap = new LinkedHashMap<>(); + private volatile List nodeList = new LinkedList<>(); + private final Map nodeMap = new LinkedHashMap<>(); - // Mapping of NodeId to NodeTokens. Populated either from RM response or - // generated locally if required. - private Map nodeTokens = new HashMap<>(); private final Set blacklist = new HashSet<>(); // This maintains a map of outstanding OPPORTUNISTIC Reqs. Key-ed by Priority, - // Resource Name (Host/rack/any) and capability. This mapping is required + // Resource Name (host/rack/any) and capability. This mapping is required // to match a received Container to an outstanding OPPORTUNISTIC // ResourceRequest (ask). - private final TreeMap> + private final TreeMap> outstandingOpReqs = new TreeMap<>(); - public Set getContainersAllocated() { - return containersAllocated; - } - - public OpportunisticContainerAllocator.AllocationParams getAppParams() { + public AllocationParams getAppParams() { return appParams; } @@ -87,19 +77,37 @@ public void setContainerIdGenerator( this.containerIdGenerator = containerIdGenerator; } - public Map getNodeMap() { - return nodeMap; + public Map getNodeMap() { + return Collections.unmodifiableMap(nodeMap); + } + + public synchronized void updateNodeList(List newNodeList) { + // This is an optimization for centralized placement. The + // OppContainerAllocatorAMService has a cached list of nodes which it sets + // here. The nodeMap needs to be updated only if the backing node list is + // modified. + if (newNodeList != nodeList) { + nodeList = newNodeList; + nodeMap.clear(); + for (RemoteNode n : nodeList) { + nodeMap.put(n.getNodeId().getHost(), n); + } + } } - public Map getNodeTokens() { - return nodeTokens; + public void updateAllocationParams(Resource minResource, Resource maxResource, + Resource incrResource, int containerTokenExpiryInterval) { + appParams.setMinResource(minResource); + appParams.setMaxResource(maxResource); + appParams.setIncrementResource(incrResource); + appParams.setContainerTokenExpiryInterval(containerTokenExpiryInterval); } public Set getBlacklist() { return blacklist; } - public TreeMap> + public TreeMap> getOutstandingOpReqs() { return outstandingOpReqs; } @@ -115,7 +123,7 @@ public void setContainerIdGenerator( */ public void addToOutstandingReqs(List resourceAsks) { for (ResourceRequest request : resourceAsks) { - Priority priority = request.getPriority(); + SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(request); // TODO: Extend for Node/Rack locality. We only handle ANY requests now if (!ResourceRequest.isAnyLocation(request.getResourceName())) { @@ -127,10 +135,10 @@ public void addToOutstandingReqs(List resourceAsks) { } Map reqMap = - outstandingOpReqs.get(priority); + outstandingOpReqs.get(schedulerKey); if (reqMap == null) { reqMap = new HashMap<>(); - outstandingOpReqs.put(priority, reqMap); + outstandingOpReqs.put(schedulerKey, reqMap); } ResourceRequest resourceRequest = reqMap.get(request.getCapability()); @@ -142,7 +150,8 @@ public void addToOutstandingReqs(List resourceAsks) { resourceRequest.getNumContainers() + request.getNumContainers()); } if (ResourceRequest.isAnyLocation(request.getResourceName())) { - LOG.info("# of outstandingOpReqs in ANY (at priority = " + priority + LOG.info("# of outstandingOpReqs in ANY (at" + + "priority = "+ schedulerKey.getPriority() + ", with capability = " + request.getCapability() + " ) : " + resourceRequest.getNumContainers()); } @@ -158,9 +167,9 @@ public void addToOutstandingReqs(List resourceAsks) { public void matchAllocationToOutstandingRequest(Resource capability, List allocatedContainers) { for (Container c : allocatedContainers) { - containersAllocated.add(c.getId()); + SchedulerRequestKey schedulerKey = SchedulerRequestKey.extractFrom(c); Map asks = - outstandingOpReqs.get(c.getPriority()); + outstandingOpReqs.get(schedulerKey); if (asks == null) { continue; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerRequestKey.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/SchedulerRequestKey.java similarity index 92% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerRequestKey.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/SchedulerRequestKey.java index 4b640ae..9b7edbe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerRequestKey.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/scheduler/SchedulerRequestKey.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.resourcemanager.scheduler; +package org.apache.hadoop.yarn.server.scheduler; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Priority; @@ -53,7 +53,7 @@ public static SchedulerRequestKey extractFrom(Container container) { container.getAllocationRequestId()); } - private SchedulerRequestKey(Priority priority, long allocationRequestId) { + SchedulerRequestKey(Priority priority, long allocationRequestId) { this.priority = priority; this.allocationRequestId = allocationRequestId; } @@ -119,4 +119,12 @@ public int hashCode() { getAllocationRequestId() >>> 32)); return result; } + + @Override + public String toString() { + return "SchedulerRequestKey{" + + "priority=" + priority + + ", allocationRequestId=" + allocationRequestId + + '}'; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index c23d557..98b172d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -39,12 +39,16 @@ message NodeStatusProto { optional ResourceUtilizationProto containers_utilization = 6; optional ResourceUtilizationProto node_utilization = 7; repeated ContainerProto increased_containers = 8; - optional QueuedContainersStatusProto queued_container_status = 9; + optional OpportunisticContainersStatusProto opportunistic_containers_status = 9; } -message QueuedContainersStatusProto { - optional int32 estimated_queue_wait_time = 1; - optional int32 wait_queue_length = 2; +message OpportunisticContainersStatusProto { + optional int32 running_opport_containers = 1; + optional int64 opport_memory_used = 2; + optional int32 opport_cores_used = 3; + optional int32 queued_opport_containers = 4; + optional int32 wait_queue_length = 5; + optional int32 estimated_queue_wait_time = 6; } message MasterKeyProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto index edbb346..353b796 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto @@ -26,6 +26,11 @@ import "yarn_protos.proto"; import "yarn_server_common_protos.proto"; import "yarn_service_protos.proto"; +message RemoteNodeProto { + optional NodeIdProto node_id = 1; + optional string http_address = 2; +} + message RegisterDistributedSchedulingAMResponseProto { optional RegisterApplicationMasterResponseProto register_response = 1; optional ResourceProto max_container_resource = 2; @@ -33,12 +38,12 @@ message RegisterDistributedSchedulingAMResponseProto { optional ResourceProto incr_container_resource = 4; optional int32 container_token_expiry_interval = 5; optional int64 container_id_start = 6; - repeated NodeIdProto nodes_for_scheduling = 7; + repeated RemoteNodeProto nodes_for_scheduling = 7; } message DistributedSchedulingAllocateResponseProto { optional AllocateResponseProto allocate_response = 1; - repeated NodeIdProto nodes_for_scheduling = 2; + repeated RemoteNodeProto nodes_for_scheduling = 2; } message DistributedSchedulingAllocateRequestProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java index 9f4b436..b9cde24 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestProtocolRecords.java @@ -48,7 +48,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerRequestPBImpl; import org.apache.hadoop.yarn.server.api.records.NodeStatus; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.util.Records; import org.junit.Assert; import org.junit.Test; @@ -146,11 +146,11 @@ public void testNodeHeartBeatRequest() throws IOException { Records.newRecord(NodeHeartbeatRequest.class); NodeStatus nodeStatus = Records.newRecord(NodeStatus.class); - QueuedContainersStatus queuedContainersStatus = Records.newRecord - (QueuedContainersStatus.class); - queuedContainersStatus.setEstimatedQueueWaitTime(123); - queuedContainersStatus.setWaitQueueLength(321); - nodeStatus.setQueuedContainersStatus(queuedContainersStatus); + OpportunisticContainersStatus opportunisticContainersStatus = + Records.newRecord(OpportunisticContainersStatus.class); + opportunisticContainersStatus.setEstimatedQueueWaitTime(123); + opportunisticContainersStatus.setWaitQueueLength(321); + nodeStatus.setOpportunisticContainersStatus(opportunisticContainersStatus); record.setNodeStatus(nodeStatus); NodeHeartbeatRequestPBImpl pb = new @@ -159,9 +159,10 @@ public void testNodeHeartBeatRequest() throws IOException { Assert.assertEquals(123, pb.getNodeStatus() - .getQueuedContainersStatus().getEstimatedQueueWaitTime()); + .getOpportunisticContainersStatus().getEstimatedQueueWaitTime()); Assert.assertEquals(321, - pb.getNodeStatus().getQueuedContainersStatus().getWaitQueueLength()); + pb.getNodeStatus().getOpportunisticContainersStatus() + .getWaitQueueLength()); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 4e8527e..6680be8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -27,12 +27,12 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; + import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -46,15 +46,6 @@ public interface Context { /** - * Interface exposing methods related to the queuing of containers in the NM. - */ - interface QueuingContext { - ConcurrentMap getQueuedContainers(); - - ConcurrentMap getKilledQueuedContainers(); - } - - /** * Return the nodeId. Usable only when the ContainerManager is started. * * @return the NodeId @@ -104,13 +95,6 @@ NodeStatusUpdater getNodeStatusUpdater(); - /** - * Returns a QueuingContext that provides information about the - * number of Containers Queued as well as the number of Containers that were - * queued and killed. - */ - QueuingContext getQueuingContext(); - boolean isDistributedSchedulingEnabled(); OpportunisticContainerAllocator getContainerAllocator(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 6f779c2..dced31b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -56,14 +56,12 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.ConfigurationNodeLabelsProvider; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider; @@ -173,14 +171,8 @@ protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService dirsHandler) { - if (getConfig().getBoolean(YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED, - YarnConfiguration.NM_CONTAINER_QUEUING_ENABLED_DEFAULT)) { - return new QueuingContainerManagerImpl(context, exec, del, - nodeStatusUpdater, metrics, dirsHandler); - } else { - return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, - metrics, dirsHandler); - } + return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, + metrics, dirsHandler); } protected WebServer createWebServer(Context nmContext, @@ -328,9 +320,8 @@ protected void serviceInit(Configuration conf) throws Exception { addService(nodeHealthChecker); boolean isDistSchedulingEnabled = - conf.getBoolean(YarnConfiguration. - OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, - YarnConfiguration.DIST_SCHEDULING_ENABLED_DEFAULT); + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); this.context = createNMContext(containerTokenSecretManager, nmTokenSecretManager, nmStore, isDistSchedulingEnabled, conf); @@ -367,7 +358,7 @@ protected void serviceInit(Configuration conf) throws Exception { ((NMContext) context).setQueueableContainerAllocator( new OpportunisticContainerAllocator( - context.getContainerTokenSecretManager(), webServer.getPort())); + context.getContainerTokenSecretManager())); dispatcher.register(ContainerManagerEventType.class, containerManager); dispatcher.register(NodeManagerEventType.class, this); @@ -496,7 +487,6 @@ public void run() { private OpportunisticContainerAllocator containerAllocator; - private final QueuingContext queuingContext; private ContainerExecutor executor; public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, @@ -514,7 +504,6 @@ public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, this.stateStore = stateStore; this.logAggregationReportForApps = new ConcurrentLinkedQueue< LogAggregationReport>(); - this.queuingContext = new QueuingNMContext(); this.isDistSchedulingEnabled = isDistSchedulingEnabled; this.conf = conf; } @@ -643,11 +632,6 @@ public void setNodeStatusUpdater(NodeStatusUpdater nodeStatusUpdater) { this.nodeStatusUpdater = nodeStatusUpdater; } - @Override - public QueuingContext getQueuingContext() { - return this.queuingContext; - } - public boolean isDistributedSchedulingEnabled() { return isDistSchedulingEnabled; } @@ -673,29 +657,6 @@ public void setContainerExecutor(ContainerExecutor executor) { } /** - * Class that keeps the context for containers queued at the NM. - */ - public static class QueuingNMContext implements Context.QueuingContext { - protected final ConcurrentMap - queuedContainers = new ConcurrentSkipListMap<>(); - - protected final ConcurrentMap - killedQueuedContainers = new ConcurrentHashMap<>(); - - @Override - public ConcurrentMap - getQueuedContainers() { - return this.queuedContainers; - } - - @Override - public ConcurrentMap - getKilledQueuedContainers() { - return this.killedQueuedContainers; - } - } - - /** * @return the node health checker */ public NodeHealthCheckerService getNodeHealthChecker() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index fd02b73..57069aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -47,7 +47,6 @@ import org.apache.hadoop.util.VersionUtil; import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; @@ -62,7 +61,6 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.impl.pb.RecordFactoryPBImpl; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerConstants; import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.ServerRMProxy; @@ -75,7 +73,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.UnRegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; @@ -89,7 +87,6 @@ import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; -import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.YarnVersionInfo; import com.google.common.annotations.VisibleForTesting; @@ -478,16 +475,21 @@ protected NodeStatus getNodeStatus(int responseId) throws IOException { createKeepAliveApplicationList(), nodeHealthStatus, containersUtilization, nodeUtilization, increasedContainers); - nodeStatus.setQueuedContainersStatus(getQueuedContainerStatus()); + nodeStatus.setOpportunisticContainersStatus( + getOpportunisticContainersStatus()); return nodeStatus; } - private QueuedContainersStatus getQueuedContainerStatus() { - QueuedContainersStatus status = QueuedContainersStatus.newInstance(); - status.setWaitQueueLength( - this.context.getQueuingContext().getQueuedContainers().size()); + /** + * Get the status of the OPPORTUNISTIC containers. + * @return the status of the OPPORTUNISTIC containers. + */ + private OpportunisticContainersStatus getOpportunisticContainersStatus() { + OpportunisticContainersStatus status = + this.context.getContainerManager().getOpportunisticContainersStatus(); return status; } + /** * Get the aggregated utilization of the containers in this node. * @return Resource utilization of all the containers. @@ -563,9 +565,6 @@ private void updateNMResource(Resource resource) { } } - // Account for all containers that got killed while they were still queued. - pendingCompletedContainers.putAll(getKilledQueuedContainerStatuses()); - containerStatuses.addAll(pendingCompletedContainers.values()); if (LOG.isDebugEnabled()) { @@ -575,43 +574,6 @@ private void updateNMResource(Resource resource) { return containerStatuses; } - /** - * Add to the container statuses the status of the containers that got killed - * while they were queued. - */ - private Map getKilledQueuedContainerStatuses() { - Map killedQueuedContainerStatuses = - new HashMap<>(); - for (Map.Entry killedQueuedContainer : - this.context.getQueuingContext(). - getKilledQueuedContainers().entrySet()) { - ContainerTokenIdentifier containerTokenId = killedQueuedContainer - .getKey(); - ContainerId containerId = containerTokenId.getContainerID(); - ContainerStatus containerStatus = BuilderUtils.newContainerStatus( - containerId, ContainerState.COMPLETE, - killedQueuedContainer.getValue(), ContainerExitStatus.ABORTED, - containerTokenId.getResource(), containerTokenId.getExecutionType()); - ApplicationId applicationId = containerId.getApplicationAttemptId() - .getApplicationId(); - if (isApplicationStopped(applicationId)) { - if (LOG.isDebugEnabled()) { - LOG.debug(applicationId + " is completing, " + " remove " - + containerId + " from NM context."); - } - this.context.getQueuingContext().getKilledQueuedContainers() - .remove(containerTokenId); - killedQueuedContainerStatuses.put(containerId, containerStatus); - } else { - if (!isContainerRecentlyStopped(containerId)) { - killedQueuedContainerStatuses.put(containerId, containerStatus); - } - } - addCompletedContainer(containerId); - } - return killedQueuedContainerStatuses; - } - private List getRunningApplications() { List runningApplications = new ArrayList(); runningApplications.addAll(this.context.getApplications().keySet()); @@ -696,17 +658,6 @@ public void removeOrTrackCompletedContainersFromContext( } } - // Remove null containers from queuing context for killed queued containers. - Iterator killedQueuedContIter = - context.getQueuingContext().getKilledQueuedContainers().keySet(). - iterator(); - while (killedQueuedContIter.hasNext()) { - if (removedNullContainers.contains( - killedQueuedContIter.next().getContainerID())) { - killedQueuedContIter.remove(); - } - } - if (!removedContainers.isEmpty()) { LOG.info("Removed completed containers from NM context: " + removedContainers); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java index 79882aa..dc56090 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java @@ -79,7 +79,7 @@ * to intercept and inspect messages from application master to the cluster * resource manager. It listens to messages from the application master and * creates a request intercepting pipeline instance for each application. The - * pipeline is a chain of intercepter instances that can inspect and modify the + * pipeline is a chain of interceptor instances that can inspect and modify the * request/response as needed. */ public class AMRMProxyService extends AbstractService implements diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/DefaultRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/DefaultRequestInterceptor.java index efbdfb4..22fc8f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/DefaultRequestInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/DefaultRequestInterceptor.java @@ -152,7 +152,7 @@ public AllocateResponse allocate(final AllocateRequest request) return ((DistributedSchedulingAMProtocol)rmClient) .registerApplicationMasterForDistributedScheduling(request); } else { - throw new YarnException("Distributed Scheduling is not enabled !!"); + throw new YarnException("Distributed Scheduling is not enabled."); } } @@ -174,7 +174,7 @@ public DistributedSchedulingAllocateResponse allocateForDistributedScheduling( } return allocateResponse; } else { - throw new YarnException("Distributed Scheduling is not enabled !!"); + throw new YarnException("Distributed Scheduling is not enabled."); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java index 0da02b3..066d987 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManager.java @@ -22,9 +22,12 @@ import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor .ContainersMonitor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler + .ContainerScheduler; /** * The ContainerManager is an entity that manages the life cycle of Containers. @@ -35,8 +38,12 @@ ContainersMonitor getContainersMonitor(); + OpportunisticContainersStatus getOpportunisticContainersStatus(); + void updateQueuingLimit(ContainerQueuingLimit queuingLimit); void setBlockNewContainerRequests(boolean blockNewContainerRequests); + ContainerScheduler getContainerScheduler(); + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index b4e6725..9d5246f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -88,6 +88,7 @@ import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ContainerType; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent; @@ -134,6 +135,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredApplicationsState; @@ -202,6 +206,7 @@ private final WriteLock writeLock; private AMRMProxyService amrmProxyService; protected boolean amrmProxyEnabled = false; + private final ContainerScheduler containerScheduler; private long waitForContainersOnShutdownMillis; @@ -225,6 +230,8 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec, addService(containersLauncher); this.nodeStatusUpdater = nodeStatusUpdater; + this.containerScheduler = createContainerScheduler(context); + addService(containerScheduler); // Start configurable services auxiliaryServices = new AuxServices(); @@ -242,7 +249,8 @@ public ContainerManagerImpl(Context context, ContainerExecutor exec, dispatcher.register(AuxServicesEventType.class, auxiliaryServices); dispatcher.register(ContainersMonitorEventType.class, containersMonitor); dispatcher.register(ContainersLauncherEventType.class, containersLauncher); - + dispatcher.register(ContainerSchedulerEventType.class, containerScheduler); + addService(dispatcher); ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); @@ -281,7 +289,9 @@ public void serviceInit(Configuration conf) throws Exception { protected void createAMRMProxyService(Configuration conf) { this.amrmProxyEnabled = conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED, - YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED); + YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED) || + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); if (amrmProxyEnabled) { LOG.info("AMRMProxyService is enabled. " @@ -294,6 +304,14 @@ protected void createAMRMProxyService(Configuration conf) { } } + @VisibleForTesting + protected ContainerScheduler createContainerScheduler(Context cntxt) { + // Currently, this dispatcher is shared by the ContainerManager, + // all the containers, the container monitor and all the container. + // The ContainerScheduler may use its own dispatcher. + return new ContainerScheduler(cntxt, dispatcher, metrics); + } + protected ContainersMonitor createContainersMonitor(ContainerExecutor exec) { return new ContainersMonitorImpl(exec, dispatcher, this.context); } @@ -1210,10 +1228,8 @@ protected void stopContainerInternal(ContainerId containerID) } } else { context.getNMStateStore().storeContainerKilled(containerID); - dispatcher.getEventHandler().handle( - new ContainerKillEvent(containerID, - ContainerExitStatus.KILLED_BY_APPMASTER, - "Container killed by the ApplicationMaster.")); + container.sendKillEvent(ContainerExitStatus.KILLED_BY_APPMASTER, + "Container killed by the ApplicationMaster."); NMAuditLogger.logSuccess(container.getUser(), AuditConstants.STOP_CONTAINER, "ContainerManageImpl", containerID @@ -1444,8 +1460,13 @@ protected boolean isServiceStopped() { } @Override + public OpportunisticContainersStatus getOpportunisticContainersStatus() { + return this.containerScheduler.getOpportunisticContainersStatus(); + } + + @Override public void updateQueuingLimit(ContainerQueuingLimit queuingLimit) { - LOG.trace("Implementation does not support queuing of Containers !!"); + this.containerScheduler.updateQueuingLimit(queuingLimit); } @SuppressWarnings("unchecked") @@ -1606,4 +1627,9 @@ private void internalSignalToContainer(SignalContainerRequest request, LOG.info("Container " + containerId + " no longer exists"); } } + + @Override + public ContainerScheduler getContainerScheduler() { + return this.containerScheduler; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index f8a7e35..77ac357 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -80,7 +80,13 @@ boolean isReInitializing(); + boolean isMarkedForKilling(); + boolean canRollback(); void commitUpgrade(); + + void sendLaunchEvent(); + + void sendKillEvent(int exitStatus, String description); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 6b878aa..a2e8f07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; @@ -72,7 +73,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent; -import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredContainerState; @@ -163,6 +165,7 @@ private ReInitializationContext createContextForRollback() { private String ips; private volatile ReInitializationContext reInitContext; private volatile boolean isReInitializing = false; + private volatile boolean isMarkeForKilling = false; /** The NM-wide configuration - not specific to this container */ private final Configuration daemonConf; @@ -284,7 +287,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, // From NEW State .addTransition(ContainerState.NEW, EnumSet.of(ContainerState.LOCALIZING, - ContainerState.LOCALIZED, + ContainerState.SCHEDULED, ContainerState.LOCALIZATION_FAILED, ContainerState.DONE), ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition()) @@ -296,7 +299,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, // From LOCALIZING State .addTransition(ContainerState.LOCALIZING, - EnumSet.of(ContainerState.LOCALIZING, ContainerState.LOCALIZED), + EnumSet.of(ContainerState.LOCALIZING, ContainerState.SCHEDULED), ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition()) .addTransition(ContainerState.LOCALIZING, ContainerState.LOCALIZATION_FAILED, @@ -307,7 +310,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.LOCALIZING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, - new KillDuringLocalizationTransition()) + new KillBeforeRunningTransition()) // From LOCALIZATION_FAILED State .addTransition(ContainerState.LOCALIZATION_FAILED, @@ -332,17 +335,18 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, ContainerState.LOCALIZATION_FAILED, ContainerEventType.RESOURCE_FAILED) - // From LOCALIZED State - .addTransition(ContainerState.LOCALIZED, ContainerState.RUNNING, + // From SCHEDULED State + .addTransition(ContainerState.SCHEDULED, ContainerState.RUNNING, ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition()) - .addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE, + .addTransition(ContainerState.SCHEDULED, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, new ExitedWithFailureTransition(true)) - .addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED, + .addTransition(ContainerState.SCHEDULED, ContainerState.SCHEDULED, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) - .addTransition(ContainerState.LOCALIZED, ContainerState.KILLING, - ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.SCHEDULED, ContainerState.KILLING, + ContainerEventType.KILL_CONTAINER, + new KillBeforeRunningTransition()) // From RUNNING State .addTransition(ContainerState.RUNNING, @@ -351,7 +355,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, new ExitedWithSuccessTransition(true)) .addTransition(ContainerState.RUNNING, EnumSet.of(ContainerState.RELAUNCHING, - ContainerState.LOCALIZED, + ContainerState.SCHEDULED, ContainerState.EXITED_WITH_FAILURE), ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, new RetryFailureTransition()) @@ -400,7 +404,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, .addTransition(ContainerState.REINITIALIZING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillTransition()) .addTransition(ContainerState.REINITIALIZING, - ContainerState.LOCALIZED, + ContainerState.SCHEDULED, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, new KilledForReInitializationTransition()) @@ -518,9 +522,11 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, case NEW: case LOCALIZING: case LOCALIZATION_FAILED: - case LOCALIZED: + case SCHEDULED: + return org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED; case RUNNING: case RELAUNCHING: + case REINITIALIZING: case EXITED_WITH_SUCCESS: case EXITED_WITH_FAILURE: case KILLING: @@ -547,7 +553,7 @@ public String getUser() { public Map> getLocalizedResources() { this.readLock.lock(); try { - if (ContainerState.LOCALIZED == getContainerState() + if (ContainerState.SCHEDULED == getContainerState() || ContainerState.RELAUNCHING == getContainerState()) { return resourceSet.getLocalizedResources(); } else { @@ -680,7 +686,15 @@ private void sendFinishedEvents() { // Inform the application @SuppressWarnings("rawtypes") EventHandler eventHandler = dispatcher.getEventHandler(); - eventHandler.handle(new ApplicationContainerFinishedEvent(containerId)); + + ContainerStatus containerStatus = cloneAndGetContainerStatus(); + eventHandler.handle( + new ApplicationContainerFinishedEvent( + containerStatus.getContainerId())); + + // Tell the scheduler the container is Done + eventHandler.handle(new ContainerSchedulerEvent(this, + ContainerSchedulerEventType.CONTAINER_COMPLETED)); // Remove the container from the resource-monitor eventHandler.handle(new ContainerStopMonitoringEvent(containerId)); // Tell the logService too @@ -689,7 +703,8 @@ private void sendFinishedEvents() { } @SuppressWarnings("unchecked") // dispatcher not typed - private void sendLaunchEvent() { + @Override + public void sendLaunchEvent() { ContainersLauncherEventType launcherEvent = ContainersLauncherEventType.LAUNCH_CONTAINER; if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) { @@ -702,6 +717,22 @@ private void sendLaunchEvent() { } @SuppressWarnings("unchecked") // dispatcher not typed + private void sendScheduleEvent() { + dispatcher.getEventHandler().handle( + new ContainerSchedulerEvent(this, + ContainerSchedulerEventType.SCHEDULE_CONTAINER) + ); + } + + @SuppressWarnings("unchecked") // dispatcher not typed + @Override + public void sendKillEvent(int exitStatus, String description) { + this.isMarkeForKilling = true; + dispatcher.getEventHandler().handle( + new ContainerKillEvent(containerId, exitStatus, description)); + } + + @SuppressWarnings("unchecked") // dispatcher not typed private void sendRelaunchEvent() { ContainersLauncherEventType launcherEvent = ContainersLauncherEventType.RELAUNCH_CONTAINER; @@ -772,7 +803,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { * to the ResourceLocalizationManager and enters LOCALIZING state. * * If there are no resources to localize, sends LAUNCH_CONTAINER event - * and enters LOCALIZED state directly. + * and enters SCHEDULED state directly. * * If there are any invalid resources specified, enters LOCALIZATION_FAILED * directly. @@ -838,9 +869,9 @@ public ContainerState transition(ContainerImpl container, } return ContainerState.LOCALIZING; } else { - container.sendLaunchEvent(); + container.sendScheduleEvent(); container.metrics.endInitingContainer(); - return ContainerState.LOCALIZED; + return ContainerState.SCHEDULED; } } } @@ -880,7 +911,7 @@ public ContainerState transition(ContainerImpl container, new ContainerLocalizationEvent(LocalizationEventType. CONTAINER_RESOURCES_LOCALIZED, container)); - container.sendLaunchEvent(); + container.sendScheduleEvent(); container.metrics.endInitingContainer(); // If this is a recovered container that has already launched, skip @@ -900,7 +931,7 @@ public ContainerState transition(ContainerImpl container, SharedCacheUploadEventType.UPLOAD)); } - return ContainerState.LOCALIZED; + return ContainerState.SCHEDULED; } } @@ -1090,7 +1121,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { } /** - * Transition from LOCALIZED state to RUNNING state upon receiving + * Transition from SCHEDULED state to RUNNING state upon receiving * a CONTAINER_LAUNCHED event. */ static class LaunchTransition extends ContainerTransition { @@ -1248,7 +1279,7 @@ public ContainerState transition(final ContainerImpl container, container.containerId.getApplicationAttemptId().getApplicationId(), container.containerId); new KilledForReInitializationTransition().transition(container, event); - return ContainerState.LOCALIZED; + return ContainerState.SCHEDULED; } else { new ExitedWithFailureTransition(true).transition(container, event); return ContainerState.EXITED_WITH_FAILURE; @@ -1330,7 +1361,7 @@ public void transition(ContainerImpl container, } /** - * Transition to LOCALIZED and wait for RE-LAUNCH + * Transition to SCHEDULED and wait for RE-LAUNCH */ static class KilledForReInitializationTransition extends ContainerTransition { @@ -1354,8 +1385,8 @@ public void transition(ContainerImpl container, container.resourceSet = container.reInitContext.mergedResourceSet(container.resourceSet); - - container.sendLaunchEvent(); + container.isMarkeForKilling = false; + container.sendScheduleEvent(); } } @@ -1383,7 +1414,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { * Transition from LOCALIZING to KILLING upon receiving * KILL_CONTAINER event. */ - static class KillDuringLocalizationTransition implements + static class KillBeforeRunningTransition implements SingleArcTransition { @Override public void transition(ContainerImpl container, ContainerEvent event) { @@ -1415,7 +1446,7 @@ public void transition(ContainerImpl container, ContainerEvent event) { /** * Transitions upon receiving KILL_CONTAINER. - * - LOCALIZED -> KILLING. + * - SCHEDULED -> KILLING. * - RUNNING -> KILLING. * - REINITIALIZING -> KILLING. */ @@ -1641,7 +1672,8 @@ public void handle(ContainerEvent event) { stateMachine.doTransition(event.getType(), event); } catch (InvalidStateTransitionException e) { LOG.warn("Can't handle this event at current state: Current: [" - + oldState + "], eventType: [" + event.getType() + "]", e); + + oldState + "], eventType: [" + event.getType() + "]," + + " container: [" + containerID + "]", e); } if (newState != null && oldState != newState) { LOG.info("Container " + containerID + " transitioned from " @@ -1700,6 +1732,11 @@ public boolean isReInitializing() { } @Override + public boolean isMarkedForKilling() { + return this.isMarkeForKilling; + } + + @Override public boolean canRollback() { return (this.reInitContext != null) && (this.reInitContext.canRollback()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java index 70de90c..91d1356 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java @@ -19,7 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; public enum ContainerState { - NEW, LOCALIZING, LOCALIZATION_FAILED, LOCALIZED, RUNNING, RELAUNCHING, + NEW, LOCALIZING, LOCALIZATION_FAILED, SCHEDULED, RUNNING, RELAUNCHING, REINITIALIZING, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, KILLING, CONTAINER_CLEANEDUP_AFTER_KILL, CONTAINER_RESOURCES_CLEANINGUP, DONE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index d774030..823457f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -104,9 +104,10 @@ private final Context context; private final ContainerManagerImpl containerManager; - protected AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false); + protected AtomicBoolean containerAlreadyLaunched = new AtomicBoolean(false); protected AtomicBoolean completed = new AtomicBoolean(false); + private volatile boolean killedBeforeStart = false; private long sleepDelayBeforeSigKill = 250; private long maxKillWaitTime = 2000; @@ -401,7 +402,12 @@ protected boolean validateContainerState() { @SuppressWarnings("unchecked") protected int launchContainer(ContainerStartContext ctx) throws IOException { ContainerId containerId = container.getContainerId(); - + if (container.isMarkedForKilling()) { + LOG.info("Container " + containerId + " not launched as it has already " + + "been marked for Killing"); + this.killedBeforeStart = true; + return ExitCode.TERMINATED.getExitCode(); + } // LaunchContainer is a blocking call. We are here almost means the // container is launched, so send out the event. dispatcher.getEventHandler().handle(new ContainerEvent( @@ -410,7 +416,7 @@ protected int launchContainer(ContainerStartContext ctx) throws IOException { context.getNMStateStore().storeContainerLaunched(containerId); // Check if the container is signalled to be killed. - if (!shouldLaunchContainer.compareAndSet(false, true)) { + if (!containerAlreadyLaunched.compareAndSet(false, true)) { LOG.info("Container " + containerId + " not launched as " + "cleanup already called"); return ExitCode.TERMINATED.getExitCode(); @@ -451,10 +457,14 @@ protected void handleContainerExitCode(int exitCode, Path containerLogDir) { || exitCode == ExitCode.TERMINATED.getExitCode()) { // If the process was killed, Send container_cleanedup_after_kill and // just break out of this method. - dispatcher.getEventHandler().handle( - new ContainerExitEvent(containerId, - ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode, - diagnosticInfo.toString())); + + // If Container was killed before starting... NO need to do this. + if (!killedBeforeStart) { + dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerId, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode, + diagnosticInfo.toString())); + } } else if (exitCode != 0) { handleContainerExitWithFailure(containerId, exitCode, containerLogDir, diagnosticInfo); @@ -565,7 +575,8 @@ public void cleanupContainer() throws IOException { } // launch flag will be set to true if process already launched - boolean alreadyLaunched = !shouldLaunchContainer.compareAndSet(false, true); + boolean alreadyLaunched = + !containerAlreadyLaunched.compareAndSet(false, true); if (!alreadyLaunched) { LOG.info("Container " + containerIdStr + " not launched." + " No cleanup needed to be done"); @@ -660,7 +671,8 @@ public void signalContainer(SignalContainerCommand command) LOG.info("Sending signal " + command + " to container " + containerIdStr); - boolean alreadyLaunched = !shouldLaunchContainer.compareAndSet(false, true); + boolean alreadyLaunched = + !containerAlreadyLaunched.compareAndSet(false, true); if (!alreadyLaunched) { LOG.info("Container " + containerIdStr + " not launched." + " Not sending the signal"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java index 3cd31b7..a04a23f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java @@ -39,7 +39,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext; -import org.apache.hadoop.yarn.util.ConverterUtils; + /** * This is a ContainerLaunch which has been recovered after an NM restart (for @@ -57,7 +57,7 @@ public RecoveredContainerLaunch(Context context, Configuration configuration, { super(context, configuration, dispatcher, exec, app, container, dirsHandler, containerManager); - this.shouldLaunchContainer.set(true); + this.containerAlreadyLaunched.set(true); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java index 1069b4f..f27e8d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitor.java @@ -19,29 +19,53 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; import org.apache.hadoop.service.Service; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.nodemanager.ResourceView; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo; public interface ContainersMonitor extends Service, EventHandler, ResourceView { - public ResourceUtilization getContainersUtilization(); + ResourceUtilization getContainersUtilization(); - ResourceUtilization getContainersAllocation(); - - boolean hasResourcesAvailable(ProcessTreeInfo pti); - - void increaseContainersAllocation(ProcessTreeInfo pti); - - void decreaseContainersAllocation(ProcessTreeInfo pti); - - void increaseResourceUtilization(ResourceUtilization resourceUtil, - ProcessTreeInfo pti); - - void decreaseResourceUtilization(ResourceUtilization resourceUtil, - ProcessTreeInfo pti); + float getVmemRatio(); void subtractNodeResourcesFromResourceUtilization( ResourceUtilization resourceUtil); + + class ContainerManagerUtils { + /** + * Utility method to add a {@link Resource} to the + * {@link ResourceUtilization}. + * @param containersMonitor Containers Monitor. + * @param resourceUtil Resource Utilization. + * @param resource Resource. + */ + public static void increaseResourceUtilization( + ContainersMonitor containersMonitor, ResourceUtilization resourceUtil, + Resource resource) { + float vCores = (float) resource.getVirtualCores() / + containersMonitor.getVCoresAllocatedForContainers(); + int vmem = (int) (resource.getMemorySize() + * containersMonitor.getVmemRatio()); + resourceUtil.addTo((int)resource.getMemorySize(), vmem, vCores); + } + + /** + * Utility method to subtract a {@link Resource} from the + * {@link ResourceUtilization}. + * @param containersMonitor Containers Monitor. + * @param resourceUtil Resource Utilization. + * @param resource Resource. + */ + public static void decreaseResourceUtilization( + ContainersMonitor containersMonitor, ResourceUtilization resourceUtil, + Resource resource) { + float vCores = (float) resource.getVirtualCores() / + containersMonitor.getVCoresAllocatedForContainers(); + int vmem = (int) (resource.getMemorySize() + * containersMonitor.getVmemRatio()); + resourceUtil.subtractFrom((int)resource.getMemorySize(), vmem, vCores); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 1db2357..d1ec000 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -82,9 +82,6 @@ private int nodeCpuPercentageForYARN; private ResourceUtilization containersUtilization; - // Tracks the aggregated allocation of the currently allocated containers - // when queuing of containers at the NMs is enabled. - private ResourceUtilization containersAllocation; private volatile boolean stopped = false; @@ -99,7 +96,6 @@ public ContainersMonitorImpl(ContainerExecutor exec, this.monitoringThread = new MonitoringThread(); this.containersUtilization = ResourceUtilization.newInstance(0, 0, 0.0f); - this.containersAllocation = ResourceUtilization.newInstance(0, 0, 0.0f); } @Override @@ -630,6 +626,8 @@ private void changeContainerResource( LOG.warn("Container " + containerId.toString() + "does not exist"); return; } + // YARN-5860: Route this through the ContainerScheduler to + // fix containerAllocation container.setResource(resource); } @@ -729,67 +727,6 @@ public void setContainersUtilization(ResourceUtilization utilization) { this.containersUtilization = utilization; } - public ResourceUtilization getContainersAllocation() { - return this.containersAllocation; - } - - /** - * @return true if there are available allocated resources for the given - * container to start. - */ - @Override - public boolean hasResourcesAvailable(ProcessTreeInfo pti) { - synchronized (this.containersAllocation) { - // Check physical memory. - if (this.containersAllocation.getPhysicalMemory() + - (int) (pti.getPmemLimit() >> 20) > - (int) (getPmemAllocatedForContainers() >> 20)) { - return false; - } - // Check virtual memory. - if (isVmemCheckEnabled() && - this.containersAllocation.getVirtualMemory() + - (int) (pti.getVmemLimit() >> 20) > - (int) (getVmemAllocatedForContainers() >> 20)) { - return false; - } - // Check CPU. - if (this.containersAllocation.getCPU() - + allocatedCpuUsage(pti) > 1.0f) { - return false; - } - } - return true; - } - - @Override - public void increaseContainersAllocation(ProcessTreeInfo pti) { - synchronized (this.containersAllocation) { - increaseResourceUtilization(this.containersAllocation, pti); - } - } - - @Override - public void decreaseContainersAllocation(ProcessTreeInfo pti) { - synchronized (this.containersAllocation) { - decreaseResourceUtilization(this.containersAllocation, pti); - } - } - - @Override - public void increaseResourceUtilization(ResourceUtilization resourceUtil, - ProcessTreeInfo pti) { - resourceUtil.addTo((int) (pti.getPmemLimit() >> 20), - (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti)); - } - - @Override - public void decreaseResourceUtilization(ResourceUtilization resourceUtil, - ProcessTreeInfo pti) { - resourceUtil.subtractFrom((int) (pti.getPmemLimit() >> 20), - (int) (pti.getVmemLimit() >> 20), allocatedCpuUsage(pti)); - } - @Override public void subtractNodeResourcesFromResourceUtilization( ResourceUtilization resourceUtil) { @@ -797,14 +734,9 @@ public void subtractNodeResourcesFromResourceUtilization( (int) (getVmemAllocatedForContainers() >> 20), 1.0f); } - /** - * Calculates the vCores CPU usage that is assigned to the given - * {@link ProcessTreeInfo}. In particular, it takes into account the number of - * vCores that are allowed to be used by the NM and returns the CPU usage - * as a normalized value between {@literal >=} 0 and {@literal <=} 1. - */ - private float allocatedCpuUsage(ProcessTreeInfo pti) { - return (float) pti.getCpuVcores() / getVCoresAllocatedForContainers(); + @Override + public float getVmemRatio() { + return vmemRatio; } @Override @@ -875,5 +807,4 @@ protected void onStartMonitoringContainer( startEvent.getVmemLimit(), startEvent.getPmemLimit(), startEvent.getCpuVcores())); } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java deleted file mode 100644 index e8f14f1..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/QueuingContainerManagerImpl.java +++ /dev/null @@ -1,615 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.ConcurrentMap; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; -import org.apache.hadoop.yarn.api.records.ContainerExitStatus; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerStatus; -import org.apache.hadoop.yarn.api.records.ExecutionType; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ResourceUtilization; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; -import org.apache.hadoop.yarn.security.NMTokenIdentifier; -import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; -import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; -import org.apache.hadoop.yarn.server.nodemanager.Context; -import org.apache.hadoop.yarn.server.nodemanager.DeletionService; -import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; -import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo; -import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; -import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.annotations.VisibleForTesting; - -/** - * Class extending {@link ContainerManagerImpl} and is used when queuing at the - * NM is enabled. - */ -public class QueuingContainerManagerImpl extends ContainerManagerImpl { - - private static final Logger LOG = LoggerFactory - .getLogger(QueuingContainerManagerImpl.class); - - private ConcurrentMap - allocatedGuaranteedContainers; - private ConcurrentMap - allocatedOpportunisticContainers; - - private Queue queuedGuaranteedContainers; - private Queue queuedOpportunisticContainers; - - private Set opportunisticContainersToKill; - private final ContainerQueuingLimit queuingLimit; - - public QueuingContainerManagerImpl(Context context, ContainerExecutor exec, - DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, - NodeManagerMetrics metrics, LocalDirsHandlerService dirsHandler) { - super(context, exec, deletionContext, nodeStatusUpdater, metrics, - dirsHandler); - this.allocatedGuaranteedContainers = new ConcurrentHashMap<>(); - this.allocatedOpportunisticContainers = new ConcurrentHashMap<>(); - this.queuedGuaranteedContainers = new ConcurrentLinkedQueue<>(); - this.queuedOpportunisticContainers = new ConcurrentLinkedQueue<>(); - this.opportunisticContainersToKill = Collections.synchronizedSet( - new HashSet()); - this.queuingLimit = ContainerQueuingLimit.newInstance(); - } - - @Override - protected EventHandler createApplicationEventDispatcher() { - return new QueuingApplicationEventDispatcher( - super.createApplicationEventDispatcher()); - } - - @Override - protected void startContainerInternal( - ContainerTokenIdentifier containerTokenIdentifier, - StartContainerRequest request) throws YarnException, IOException { - this.context.getQueuingContext().getQueuedContainers().put( - containerTokenIdentifier.getContainerID(), containerTokenIdentifier); - - AllocatedContainerInfo allocatedContInfo = new AllocatedContainerInfo( - containerTokenIdentifier, request, - containerTokenIdentifier.getExecutionType(), containerTokenIdentifier - .getResource(), getConfig()); - - // If there are already free resources for the container to start, and - // there are no queued containers waiting to be executed, start this - // container immediately. - if (queuedGuaranteedContainers.isEmpty() && - queuedOpportunisticContainers.isEmpty() && - getContainersMonitor(). - hasResourcesAvailable(allocatedContInfo.getPti())) { - startAllocatedContainer(allocatedContInfo); - } else { - ContainerId cIdToStart = containerTokenIdentifier.getContainerID(); - LOG.info("No available resources for container {} to start its execution " - + "immediately.", cIdToStart); - if (allocatedContInfo.getExecutionType() == ExecutionType.GUARANTEED) { - queuedGuaranteedContainers.add(allocatedContInfo); - // Kill running opportunistic containers to make space for - // guaranteed container. - killOpportunisticContainers(allocatedContInfo); - } else { - LOG.info("Opportunistic container {} will be queued at the NM.", - cIdToStart); - queuedOpportunisticContainers.add(allocatedContInfo); - } - } - } - - @Override - protected void stopContainerInternal(ContainerId containerID) - throws YarnException, IOException { - Container container = this.context.getContainers().get(containerID); - // If container is null and distributed scheduling is enabled, container - // might be queued. Otherwise, container might not be handled by this NM. - if (container == null && this.context.getQueuingContext() - .getQueuedContainers().containsKey(containerID)) { - ContainerTokenIdentifier containerTokenId = this.context - .getQueuingContext().getQueuedContainers().remove(containerID); - - boolean foundInQueue = removeQueuedContainer(containerID, - containerTokenId.getExecutionType()); - - if (foundInQueue) { - LOG.info("Removing queued container with ID " + containerID); - this.context.getQueuingContext().getKilledQueuedContainers().put( - containerTokenId, - "Queued container request removed by ApplicationMaster."); - } else { - // The container started execution in the meanwhile. - try { - stopContainerInternalIfRunning(containerID); - } catch (YarnException | IOException e) { - LOG.error("Container did not get removed successfully.", e); - } - } - - nodeStatusUpdater.sendOutofBandHeartBeat(); - } else { - super.stopContainerInternal(containerID); - } - } - - /** - * Start the execution of the given container. Also add it to the allocated - * containers, and update allocated resource utilization. - */ - private void startAllocatedContainer( - AllocatedContainerInfo allocatedContainerInfo) { - ProcessTreeInfo pti = allocatedContainerInfo.getPti(); - - if (allocatedContainerInfo.getExecutionType() == - ExecutionType.GUARANTEED) { - allocatedGuaranteedContainers.put(pti.getContainerId(), - allocatedContainerInfo); - } else { - allocatedOpportunisticContainers.put(pti.getContainerId(), - allocatedContainerInfo); - } - - getContainersMonitor().increaseContainersAllocation(pti); - - // Start execution of container. - ContainerId containerId = allocatedContainerInfo - .getContainerTokenIdentifier().getContainerID(); - this.context.getQueuingContext().getQueuedContainers().remove(containerId); - try { - LOG.info("Starting container [" + containerId + "]"); - super.startContainerInternal( - allocatedContainerInfo.getContainerTokenIdentifier(), - allocatedContainerInfo.getStartRequest()); - } catch (YarnException | IOException e) { - containerFailedToStart(pti.getContainerId(), - allocatedContainerInfo.getContainerTokenIdentifier()); - LOG.error("Container failed to start.", e); - } - } - - private void containerFailedToStart(ContainerId containerId, - ContainerTokenIdentifier containerTokenId) { - this.context.getQueuingContext().getQueuedContainers().remove(containerId); - - removeAllocatedContainer(containerId); - - this.context.getQueuingContext().getKilledQueuedContainers().put( - containerTokenId, - "Container removed from queue as it failed to start."); - } - - /** - * Remove the given container from the container queues. - * - * @return true if the container was found in one of the queues. - */ - private boolean removeQueuedContainer(ContainerId containerId, - ExecutionType executionType) { - Queue queue = - (executionType == ExecutionType.GUARANTEED) ? - queuedGuaranteedContainers : queuedOpportunisticContainers; - - boolean foundInQueue = false; - Iterator iter = queue.iterator(); - while (iter.hasNext() && !foundInQueue) { - if (iter.next().getPti().getContainerId().equals(containerId)) { - iter.remove(); - foundInQueue = true; - } - } - - return foundInQueue; - } - - /** - * Remove the given container from the allocated containers, and update - * allocated container utilization accordingly. - */ - private void removeAllocatedContainer(ContainerId containerId) { - AllocatedContainerInfo contToRemove = null; - - contToRemove = allocatedGuaranteedContainers.remove(containerId); - - if (contToRemove == null) { - contToRemove = allocatedOpportunisticContainers.remove(containerId); - } - - // If container was indeed running, update allocated resource utilization. - if (contToRemove != null) { - getContainersMonitor().decreaseContainersAllocation(contToRemove - .getPti()); - } - } - - /** - * Stop a container only if it is currently running. If queued, do not stop - * it. - */ - private void stopContainerInternalIfRunning(ContainerId containerID) - throws YarnException, IOException { - if (this.context.getContainers().containsKey(containerID)) { - stopContainerInternal(containerID); - } - } - - /** - * Kill opportunistic containers to free up resources for running the given - * container. - * - * @param allocatedContInfo - * the container whose execution needs to start by freeing up - * resources occupied by opportunistic containers. - */ - private void killOpportunisticContainers( - AllocatedContainerInfo allocatedContInfo) { - ContainerId containerToStartId = allocatedContInfo.getPti() - .getContainerId(); - List extraOpportContainersToKill = - pickOpportunisticContainersToKill(containerToStartId); - - // Kill the opportunistic containers that were chosen. - for (ContainerId contIdToKill : extraOpportContainersToKill) { - try { - stopContainerInternalIfRunning(contIdToKill); - } catch (YarnException | IOException e) { - LOG.error("Container did not get removed successfully.", e); - } - LOG.info( - "Opportunistic container {} will be killed in order to start the " - + "execution of guaranteed container {}.", - contIdToKill, containerToStartId); - } - } - - /** - * Choose the opportunistic containers to kill in order to free up resources - * for running the given container. - * - * @param containerToStartId - * the container whose execution needs to start by freeing up - * resources occupied by opportunistic containers. - * @return the additional opportunistic containers that need to be killed. - */ - protected List pickOpportunisticContainersToKill( - ContainerId containerToStartId) { - // The additional opportunistic containers that need to be killed for the - // given container to start. - List extraOpportContainersToKill = new ArrayList<>(); - // Track resources that need to be freed. - ResourceUtilization resourcesToFreeUp = resourcesToFreeUp( - containerToStartId); - - // Go over the running opportunistic containers. Avoid containers that have - // already been marked for killing. - boolean hasSufficientResources = false; - for (Map.Entry runningOpportCont : - allocatedOpportunisticContainers.entrySet()) { - ContainerId runningOpportContId = runningOpportCont.getKey(); - - // If there are sufficient resources to execute the given container, do - // not kill more opportunistic containers. - if (resourcesToFreeUp.getPhysicalMemory() <= 0 && - resourcesToFreeUp.getVirtualMemory() <= 0 && - resourcesToFreeUp.getCPU() <= 0.0f) { - hasSufficientResources = true; - break; - } - - if (!opportunisticContainersToKill.contains(runningOpportContId)) { - extraOpportContainersToKill.add(runningOpportContId); - opportunisticContainersToKill.add(runningOpportContId); - getContainersMonitor().decreaseResourceUtilization(resourcesToFreeUp, - runningOpportCont.getValue().getPti()); - } - } - - if (!hasSufficientResources) { - LOG.info( - "There are no sufficient resources to start guaranteed {} even after " - + "attempting to kill any running opportunistic containers.", - containerToStartId); - } - - return extraOpportContainersToKill; - } - - /** - * Calculates the amount of resources that need to be freed up (by killing - * opportunistic containers) in order for the given guaranteed container to - * start its execution. Resource allocation to be freed up = - * containersAllocation - - * allocation of opportunisticContainersToKill + - * allocation of queuedGuaranteedContainers that will start - * before the given container + - * allocation of given container - - * total resources of node. - * - * @param containerToStartId - * the ContainerId of the guaranteed container for which we need to - * free resources, so that its execution can start. - * @return the resources that need to be freed up for the given guaranteed - * container to start. - */ - private ResourceUtilization resourcesToFreeUp( - ContainerId containerToStartId) { - // Get allocation of currently allocated containers. - ResourceUtilization resourceAllocationToFreeUp = ResourceUtilization - .newInstance(getContainersMonitor().getContainersAllocation()); - - // Subtract from the allocation the allocation of the opportunistic - // containers that are marked for killing. - for (ContainerId opportContId : opportunisticContainersToKill) { - if (allocatedOpportunisticContainers.containsKey(opportContId)) { - getContainersMonitor().decreaseResourceUtilization( - resourceAllocationToFreeUp, - allocatedOpportunisticContainers.get(opportContId).getPti()); - } - } - // Add to the allocation the allocation of the pending guaranteed - // containers that will start before the current container will be started. - for (AllocatedContainerInfo guarContInfo : queuedGuaranteedContainers) { - getContainersMonitor().increaseResourceUtilization( - resourceAllocationToFreeUp, guarContInfo.getPti()); - if (guarContInfo.getPti().getContainerId().equals(containerToStartId)) { - break; - } - } - // Subtract the overall node resources. - getContainersMonitor().subtractNodeResourcesFromResourceUtilization( - resourceAllocationToFreeUp); - return resourceAllocationToFreeUp; - } - - /** - * If there are available resources, try to start as many pending containers - * as possible. - */ - private void startPendingContainers() { - // Start pending guaranteed containers, if resources available. - boolean resourcesAvailable = - startContainersFromQueue(queuedGuaranteedContainers); - - // Start opportunistic containers, if resources available. - if (resourcesAvailable) { - startContainersFromQueue(queuedOpportunisticContainers); - } - } - - private boolean startContainersFromQueue( - Queue queuedContainers) { - Iterator guarIter = queuedContainers.iterator(); - boolean resourcesAvailable = true; - - while (guarIter.hasNext() && resourcesAvailable) { - AllocatedContainerInfo allocatedContInfo = guarIter.next(); - - if (getContainersMonitor().hasResourcesAvailable( - allocatedContInfo.getPti())) { - startAllocatedContainer(allocatedContInfo); - guarIter.remove(); - } else { - resourcesAvailable = false; - } - } - return resourcesAvailable; - } - - @Override - protected ContainerStatus getContainerStatusInternal(ContainerId containerID, - NMTokenIdentifier nmTokenIdentifier) throws YarnException { - Container container = this.context.getContainers().get(containerID); - if (container == null) { - ContainerTokenIdentifier containerTokenId = this.context - .getQueuingContext().getQueuedContainers().get(containerID); - if (containerTokenId != null) { - ExecutionType executionType = this.context.getQueuingContext() - .getQueuedContainers().get(containerID).getExecutionType(); - return BuilderUtils.newContainerStatus(containerID, - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, "", - ContainerExitStatus.INVALID, this.context.getQueuingContext() - .getQueuedContainers().get(containerID).getResource(), - executionType); - } else { - // Check if part of the stopped/killed queued containers. - for (ContainerTokenIdentifier cTokenId : this.context - .getQueuingContext().getKilledQueuedContainers().keySet()) { - if (cTokenId.getContainerID().equals(containerID)) { - return BuilderUtils.newContainerStatus(containerID, - org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE, - this.context.getQueuingContext().getKilledQueuedContainers() - .get(cTokenId), ContainerExitStatus.ABORTED, cTokenId - .getResource(), cTokenId.getExecutionType()); - } - } - } - } - return super.getContainerStatusInternal(containerID, nmTokenIdentifier); - } - - @VisibleForTesting - public int getNumAllocatedGuaranteedContainers() { - return allocatedGuaranteedContainers.size(); - } - - @VisibleForTesting - public int getNumAllocatedOpportunisticContainers() { - return allocatedOpportunisticContainers.size(); - } - - @VisibleForTesting - public int getNumQueuedGuaranteedContainers() { - return queuedGuaranteedContainers.size(); - } - - @VisibleForTesting - public int getNumQueuedOpportunisticContainers() { - return queuedOpportunisticContainers.size(); - } - - class QueuingApplicationEventDispatcher implements - EventHandler { - private EventHandler applicationEventDispatcher; - - public QueuingApplicationEventDispatcher( - EventHandler applicationEventDispatcher) { - this.applicationEventDispatcher = applicationEventDispatcher; - } - - @Override - public void handle(ApplicationEvent event) { - if (event.getType() == - ApplicationEventType.APPLICATION_CONTAINER_FINISHED) { - if (!(event instanceof ApplicationContainerFinishedEvent)) { - throw new RuntimeException("Unexpected event type: " + event); - } - ApplicationContainerFinishedEvent finishEvent = - (ApplicationContainerFinishedEvent) event; - // Remove finished container from the allocated containers, and - // attempt to start new containers. - ContainerId contIdToRemove = finishEvent.getContainerID(); - removeAllocatedContainer(contIdToRemove); - opportunisticContainersToKill.remove(contIdToRemove); - startPendingContainers(); - } - this.applicationEventDispatcher.handle(event); - } - } - - @Override - public void updateQueuingLimit(ContainerQueuingLimit limit) { - this.queuingLimit.setMaxQueueLength(limit.getMaxQueueLength()); - // TODO: Include wait time as well once it is implemented - if (this.queuingLimit.getMaxQueueLength() > -1) { - shedQueuedOpportunisticContainers(); - } - } - - private void shedQueuedOpportunisticContainers() { - int numAllowed = this.queuingLimit.getMaxQueueLength(); - Iterator containerIter = - queuedOpportunisticContainers.iterator(); - while (containerIter.hasNext()) { - AllocatedContainerInfo cInfo = containerIter.next(); - if (numAllowed <= 0) { - containerIter.remove(); - ContainerTokenIdentifier containerTokenIdentifier = this.context - .getQueuingContext().getQueuedContainers().remove( - cInfo.getContainerTokenIdentifier().getContainerID()); - // The Container might have already started while we were - // iterating.. - if (containerTokenIdentifier != null) { - this.context.getQueuingContext().getKilledQueuedContainers() - .putIfAbsent(cInfo.getContainerTokenIdentifier(), - "Container de-queued to meet NM queuing limits. " - + "Max Queue length[" - + this.queuingLimit.getMaxQueueLength() + "]"); - } - } - numAllowed--; - } - } - - - static class AllocatedContainerInfo { - private final ContainerTokenIdentifier containerTokenIdentifier; - private final StartContainerRequest startRequest; - private final ExecutionType executionType; - private final ProcessTreeInfo pti; - - AllocatedContainerInfo(ContainerTokenIdentifier containerTokenIdentifier, - StartContainerRequest startRequest, ExecutionType executionType, - Resource resource, Configuration conf) { - this.containerTokenIdentifier = containerTokenIdentifier; - this.startRequest = startRequest; - this.executionType = executionType; - this.pti = createProcessTreeInfo(containerTokenIdentifier - .getContainerID(), resource, conf); - } - - private ContainerTokenIdentifier getContainerTokenIdentifier() { - return this.containerTokenIdentifier; - } - - private StartContainerRequest getStartRequest() { - return this.startRequest; - } - - private ExecutionType getExecutionType() { - return this.executionType; - } - - protected ProcessTreeInfo getPti() { - return this.pti; - } - - private ProcessTreeInfo createProcessTreeInfo(ContainerId containerId, - Resource resource, Configuration conf) { - long pmemBytes = resource.getMemorySize() * 1024 * 1024L; - float pmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, - YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); - long vmemBytes = (long) (pmemRatio * pmemBytes); - int cpuVcores = resource.getVirtualCores(); - - return new ProcessTreeInfo(containerId, null, null, vmemBytes, pmemBytes, - cpuVcores); - } - - @Override - public boolean equals(Object obj) { - boolean equal = false; - if (obj instanceof AllocatedContainerInfo) { - AllocatedContainerInfo otherContInfo = (AllocatedContainerInfo) obj; - equal = this.getPti().getContainerId() - .equals(otherContInfo.getPti().getContainerId()); - } - return equal; - } - - @Override - public int hashCode() { - return this.getPti().getContainerId().hashCode(); - } - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java deleted file mode 100644 index 0250807..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * This package contains classes related to the queuing of containers at - * the NM. - * - */ -package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java new file mode 100644 index 0000000..9665e75 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import org.apache.hadoop.yarn.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An implementation of the {@link ResourceUtilizationTracker} that equates + * resource utilization with the total resource allocated to the container. + */ +public class AllocationBasedResourceUtilizationTracker implements + ResourceUtilizationTracker { + + private static final Logger LOG = + LoggerFactory.getLogger(AllocationBasedResourceUtilizationTracker.class); + + private ResourceUtilization containersAllocation; + private ContainerScheduler scheduler; + + AllocationBasedResourceUtilizationTracker(ContainerScheduler scheduler) { + this.containersAllocation = ResourceUtilization.newInstance(0, 0, 0.0f); + this.scheduler = scheduler; + } + + /** + * Get the accumulation of totally allocated resources to a container. + * @return ResourceUtilization Resource Utilization. + */ + @Override + public ResourceUtilization getCurrentUtilization() { + return this.containersAllocation; + } + + /** + * Add Container's resources to the accumulated Utilization. + * @param container Container. + */ + @Override + public void addContainerResources(Container container) { + ContainersMonitor.ContainerManagerUtils.increaseResourceUtilization( + getContainersMonitor(), this.containersAllocation, + container.getResource()); + } + + /** + * Subtract Container's resources to the accumulated Utilization. + * @param container Container. + */ + @Override + public void subtractContainerResource(Container container) { + ContainersMonitor.ContainerManagerUtils.decreaseResourceUtilization( + getContainersMonitor(), this.containersAllocation, + container.getResource()); + } + + /** + * Check if NM has resources available currently to run the container. + * @param container Container. + * @return True, if NM has resources available currently to run the container. + */ + @Override + public boolean hasResourcesAvailable(Container container) { + long pMemBytes = container.getResource().getMemorySize() * 1024 * 1024L; + return hasResourcesAvailable(pMemBytes, + (long) (getContainersMonitor().getVmemRatio()* pMemBytes), + container.getResource().getVirtualCores()); + } + + private boolean hasResourcesAvailable(long pMemBytes, long vMemBytes, + int cpuVcores) { + // Check physical memory. + if (LOG.isDebugEnabled()) { + LOG.debug("pMemCheck [current={} + asked={} > allowed={}]", + this.containersAllocation.getPhysicalMemory(), + (pMemBytes >> 20), + (getContainersMonitor().getPmemAllocatedForContainers() >> 20)); + } + if (this.containersAllocation.getPhysicalMemory() + + (int) (pMemBytes >> 20) > + (int) (getContainersMonitor() + .getPmemAllocatedForContainers() >> 20)) { + return false; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("before vMemCheck" + + "[isEnabled={}, current={} + asked={} > allowed={}]", + getContainersMonitor().isVmemCheckEnabled(), + this.containersAllocation.getVirtualMemory(), (vMemBytes >> 20), + (getContainersMonitor().getVmemAllocatedForContainers() >> 20)); + } + // Check virtual memory. + if (getContainersMonitor().isVmemCheckEnabled() && + this.containersAllocation.getVirtualMemory() + + (int) (vMemBytes >> 20) > + (int) (getContainersMonitor() + .getVmemAllocatedForContainers() >> 20)) { + return false; + } + + float vCores = (float) cpuVcores / + getContainersMonitor().getVCoresAllocatedForContainers(); + if (LOG.isDebugEnabled()) { + LOG.debug("before cpuCheck [asked={} > allowed={}]", + this.containersAllocation.getCPU(), vCores); + } + // Check CPU. + if (this.containersAllocation.getCPU() + vCores > 1.0f) { + return false; + } + return true; + } + + public ContainersMonitor getContainersMonitor() { + return this.scheduler.getContainersMonitor(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java new file mode 100644 index 0000000..5c96d55 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java @@ -0,0 +1,418 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; + + +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * The ContainerScheduler manages a collection of runnable containers. It + * ensures that a container is launched only if all its launch criteria are + * met. It also ensures that OPPORTUNISTIC containers are killed to make + * room for GUARANTEED containers. + */ +public class ContainerScheduler extends AbstractService implements + EventHandler { + + private static final Logger LOG = + LoggerFactory.getLogger(ContainerScheduler.class); + + private final Context context; + private final int maxOppQueueLength; + + // Queue of Guaranteed Containers waiting for resources to run + private final LinkedHashMap + queuedGuaranteedContainers = new LinkedHashMap<>(); + // Queue of Opportunistic Containers waiting for resources to run + private final LinkedHashMap + queuedOpportunisticContainers = new LinkedHashMap<>(); + + // Used to keep track of containers that have been marked to be killed + // to make room for a guaranteed container. + private final Map oppContainersToKill = + new HashMap<>(); + + // Containers launched by the Scheduler will take a while to actually + // move to the RUNNING state, but should still be fair game for killing + // by the scheduler to make room for guaranteed containers. This holds + // containers that are in RUNNING as well as those in SCHEDULED state that + // have been marked to run, but not yet RUNNING. + private final LinkedHashMap runningContainers = + new LinkedHashMap<>(); + + private final ContainerQueuingLimit queuingLimit = + ContainerQueuingLimit.newInstance(); + + private final OpportunisticContainersStatus opportunisticContainersStatus; + + // Resource Utilization Tracker that decides how utilization of the cluster + // increases / decreases based on container start / finish + private ResourceUtilizationTracker utilizationTracker; + + private final AsyncDispatcher dispatcher; + private final NodeManagerMetrics metrics; + + /** + * Instantiate a Container Scheduler. + * @param context NodeManager Context. + * @param dispatcher AsyncDispatcher. + * @param metrics NodeManagerMetrics. + */ + public ContainerScheduler(Context context, AsyncDispatcher dispatcher, + NodeManagerMetrics metrics) { + this(context, dispatcher, metrics, context.getConf().getInt( + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, + YarnConfiguration. + DEFAULT_NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH)); + } + + @VisibleForTesting + public ContainerScheduler(Context context, AsyncDispatcher dispatcher, + NodeManagerMetrics metrics, int qLength) { + super(ContainerScheduler.class.getName()); + this.context = context; + this.dispatcher = dispatcher; + this.metrics = metrics; + this.maxOppQueueLength = (qLength <= 0) ? 0 : qLength; + this.utilizationTracker = + new AllocationBasedResourceUtilizationTracker(this); + this.opportunisticContainersStatus = + OpportunisticContainersStatus.newInstance(); + } + + /** + * Handle ContainerSchedulerEvents. + * @param event ContainerSchedulerEvent. + */ + @Override + public void handle(ContainerSchedulerEvent event) { + switch (event.getType()) { + case SCHEDULE_CONTAINER: + scheduleContainer(event.getContainer()); + break; + case CONTAINER_COMPLETED: + onContainerCompleted(event.getContainer()); + break; + case SHED_QUEUED_CONTAINERS: + shedQueuedOpportunisticContainers(); + break; + default: + LOG.error("Unknown event arrived at ContainerScheduler: " + + event.toString()); + } + } + + /** + * Return number of queued containers. + * @return Number of queued containers. + */ + public int getNumQueuedContainers() { + return this.queuedGuaranteedContainers.size() + + this.queuedOpportunisticContainers.size(); + } + + @VisibleForTesting + public int getNumQueuedGuaranteedContainers() { + return this.queuedGuaranteedContainers.size(); + } + + @VisibleForTesting + public int getNumQueuedOpportunisticContainers() { + return this.queuedOpportunisticContainers.size(); + } + + public OpportunisticContainersStatus getOpportunisticContainersStatus() { + this.opportunisticContainersStatus.setQueuedOpportContainers( + getNumQueuedOpportunisticContainers()); + this.opportunisticContainersStatus.setWaitQueueLength( + getNumQueuedContainers()); + this.opportunisticContainersStatus.setOpportMemoryUsed( + metrics.getAllocatedOpportunisticGB()); + this.opportunisticContainersStatus.setOpportCoresUsed( + metrics.getAllocatedOpportunisticVCores()); + this.opportunisticContainersStatus.setRunningOpportContainers( + metrics.getRunningOpportunisticContainers()); + return this.opportunisticContainersStatus; + } + + private void onContainerCompleted(Container container) { + oppContainersToKill.remove(container.getContainerId()); + + // This could be killed externally for eg. by the ContainerManager, + // in which case, the container might still be queued. + Container queued = + queuedOpportunisticContainers.remove(container.getContainerId()); + if (queued == null) { + queuedGuaranteedContainers.remove(container.getContainerId()); + } + + // decrement only if it was a running container + Container completedContainer = runningContainers.remove(container + .getContainerId()); + if (completedContainer != null) { + this.utilizationTracker.subtractContainerResource(container); + if (container.getContainerTokenIdentifier().getExecutionType() == + ExecutionType.OPPORTUNISTIC) { + this.metrics.completeOpportunisticContainer(container.getResource()); + } + startPendingContainers(); + } + } + + private void startPendingContainers() { + // Start pending guaranteed containers, if resources available. + boolean resourcesAvailable = + startContainersFromQueue(queuedGuaranteedContainers.values()); + // Start opportunistic containers, if resources available. + if (resourcesAvailable) { + startContainersFromQueue(queuedOpportunisticContainers.values()); + } + } + + private boolean startContainersFromQueue( + Collection queuedContainers) { + Iterator cIter = queuedContainers.iterator(); + boolean resourcesAvailable = true; + while (cIter.hasNext() && resourcesAvailable) { + Container container = cIter.next(); + if (this.utilizationTracker.hasResourcesAvailable(container)) { + startAllocatedContainer(container); + cIter.remove(); + } else { + resourcesAvailable = false; + } + } + return resourcesAvailable; + } + + @VisibleForTesting + protected void scheduleContainer(Container container) { + if (maxOppQueueLength <= 0) { + startAllocatedContainer(container); + return; + } + if (queuedGuaranteedContainers.isEmpty() && + queuedOpportunisticContainers.isEmpty() && + this.utilizationTracker.hasResourcesAvailable(container)) { + startAllocatedContainer(container); + } else { + LOG.info("No available resources for container {} to start its execution " + + "immediately.", container.getContainerId()); + boolean isQueued = true; + if (container.getContainerTokenIdentifier().getExecutionType() == + ExecutionType.GUARANTEED) { + queuedGuaranteedContainers.put(container.getContainerId(), container); + // Kill running opportunistic containers to make space for + // guaranteed container. + killOpportunisticContainers(container); + } else { + if (queuedOpportunisticContainers.size() <= maxOppQueueLength) { + LOG.info("Opportunistic container {} will be queued at the NM.", + container.getContainerId()); + queuedOpportunisticContainers.put( + container.getContainerId(), container); + } else { + isQueued = false; + LOG.info("Opportunistic container [{}] will not be queued at the NM" + + "since max queue length [{}] has been reached", + container.getContainerId(), maxOppQueueLength); + container.sendKillEvent( + ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, + "Opportunistic container queue is full."); + } + } +// if (isQueued) { +// try { +// this.context.getNMStateStore().storeContainerQueued( +// container.getContainerId()); +// } catch (IOException e) { +// LOG.warn("Could not store container [" + container.getContainerId() +// + "] state. The Container has been queued.", e); +// } +// } + } + } + + private void killOpportunisticContainers(Container container) { + List extraOpportContainersToKill = + pickOpportunisticContainersToKill(container.getContainerId()); + // Kill the opportunistic containers that were chosen. + for (Container contToKill : extraOpportContainersToKill) { + contToKill.sendKillEvent( + ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, + "Container Killed to make room for Guaranteed Container."); + oppContainersToKill.put(contToKill.getContainerId(), contToKill); + LOG.info( + "Opportunistic container {} will be killed in order to start the " + + "execution of guaranteed container {}.", + contToKill.getContainerId(), container.getContainerId()); + } + } + + private void startAllocatedContainer(Container container) { + LOG.info("Starting container [" + container.getContainerId()+ "]"); + runningContainers.put(container.getContainerId(), container); + this.utilizationTracker.addContainerResources(container); + if (container.getContainerTokenIdentifier().getExecutionType() == + ExecutionType.OPPORTUNISTIC) { + this.metrics.startOpportunisticContainer(container.getResource()); + } + container.sendLaunchEvent(); + } + + private List pickOpportunisticContainersToKill( + ContainerId containerToStartId) { + // The opportunistic containers that need to be killed for the + // given container to start. + List extraOpportContainersToKill = new ArrayList<>(); + // Track resources that need to be freed. + ResourceUtilization resourcesToFreeUp = resourcesToFreeUp( + containerToStartId); + + // Go over the running opportunistic containers. + // Use a descending iterator to kill more recently started containers. + Iterator lifoIterator = new LinkedList<>( + runningContainers.values()).descendingIterator(); + while(lifoIterator.hasNext() && + !hasSufficientResources(resourcesToFreeUp)) { + Container runningCont = lifoIterator.next(); + if (runningCont.getContainerTokenIdentifier().getExecutionType() == + ExecutionType.OPPORTUNISTIC) { + + if (oppContainersToKill.containsKey( + runningCont.getContainerId())) { + // These containers have already been marked to be killed. + // So exclude them.. + continue; + } + extraOpportContainersToKill.add(runningCont); + ContainersMonitor.ContainerManagerUtils.decreaseResourceUtilization( + getContainersMonitor(), resourcesToFreeUp, + runningCont.getResource()); + } + } + if (!hasSufficientResources(resourcesToFreeUp)) { + LOG.warn("There are no sufficient resources to start guaranteed [{}]" + + "at the moment. Opportunistic containers are in the process of" + + "being killed to make room.", containerToStartId); + } + return extraOpportContainersToKill; + } + + private boolean hasSufficientResources( + ResourceUtilization resourcesToFreeUp) { + return resourcesToFreeUp.getPhysicalMemory() <= 0 && + resourcesToFreeUp.getVirtualMemory() <= 0 && + resourcesToFreeUp.getCPU() <= 0.0f; + } + + private ResourceUtilization resourcesToFreeUp( + ContainerId containerToStartId) { + // Get allocation of currently allocated containers. + ResourceUtilization resourceAllocationToFreeUp = ResourceUtilization + .newInstance(this.utilizationTracker.getCurrentUtilization()); + + // Add to the allocation the allocation of the pending guaranteed + // containers that will start before the current container will be started. + for (Container container : queuedGuaranteedContainers.values()) { + ContainersMonitor.ContainerManagerUtils.increaseResourceUtilization( + getContainersMonitor(), resourceAllocationToFreeUp, + container.getResource()); + if (container.getContainerId().equals(containerToStartId)) { + break; + } + } + + // These resources are being freed, likely at the behest of another + // guaranteed container.. + for (Container container : oppContainersToKill.values()) { + ContainersMonitor.ContainerManagerUtils.decreaseResourceUtilization( + getContainersMonitor(), resourceAllocationToFreeUp, + container.getResource()); + } + + // Subtract the overall node resources. + getContainersMonitor().subtractNodeResourcesFromResourceUtilization( + resourceAllocationToFreeUp); + return resourceAllocationToFreeUp; + } + + @SuppressWarnings("unchecked") + public void updateQueuingLimit(ContainerQueuingLimit limit) { + this.queuingLimit.setMaxQueueLength(limit.getMaxQueueLength()); + // YARN-2886 should add support for wait-times. Include wait time as + // well once it is implemented + if ((queuingLimit.getMaxQueueLength() > -1) && + (queuingLimit.getMaxQueueLength() < + queuedOpportunisticContainers.size())) { + dispatcher.getEventHandler().handle( + new ContainerSchedulerEvent(null, + ContainerSchedulerEventType.SHED_QUEUED_CONTAINERS)); + } + } + + private void shedQueuedOpportunisticContainers() { + int numAllowed = this.queuingLimit.getMaxQueueLength(); + Iterator containerIter = + queuedOpportunisticContainers.values().iterator(); + while (containerIter.hasNext()) { + Container container = containerIter.next(); + if (numAllowed <= 0) { + container.sendKillEvent( + ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, + "Container De-queued to meet NM queuing limits."); + containerIter.remove(); + LOG.info( + "Opportunistic container {} will be killed to meet NM queuing" + + " limits.", container.getContainerId()); + } + numAllowed--; + } + } + + public ContainersMonitor getContainersMonitor() { + return this.context.getContainerManager().getContainersMonitor(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java new file mode 100644 index 0000000..460aaeb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEvent.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import org.apache.hadoop.yarn.event.AbstractEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container + .Container; + +/** + * Events consumed by the {@link ContainerScheduler}. + */ +public class ContainerSchedulerEvent extends + AbstractEvent { + + private final Container container; + + /** + * Create instance of Event. + * @param container Container. + * @param eventType EventType. + */ + public ContainerSchedulerEvent(Container container, + ContainerSchedulerEventType eventType) { + super(eventType); + this.container = container; + } + + /** + * Get the container associated with the event. + * @return Container. + */ + public Container getContainer() { + return container; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java similarity index 50% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java index fb567d5..086cb9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/QueuedContainersStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java @@ -16,30 +16,14 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.api.records; - -import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Evolving; -import org.apache.hadoop.yarn.util.Records; +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; /** - *

- * QueuedContainersStatus captures information pertaining to the - * state of execution of the Queueable containers within a node. - *

+ * Event types associated with {@link ContainerSchedulerEvent}. */ -@Private -@Evolving -public abstract class QueuedContainersStatus { - public static QueuedContainersStatus newInstance() { - return Records.newRecord(QueuedContainersStatus.class); - } - - public abstract int getEstimatedQueueWaitTime(); - - public abstract void setEstimatedQueueWaitTime(int queueWaitTime); - - public abstract int getWaitQueueLength(); - - public abstract void setWaitQueueLength(int waitQueueLength); +public enum ContainerSchedulerEventType { + SCHEDULE_CONTAINER, + CONTAINER_COMPLETED, + // Producer: Node HB response - RM has asked to shed the queue + SHED_QUEUED_CONTAINERS, } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationTracker.java new file mode 100644 index 0000000..3c17eca --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ResourceUtilizationTracker.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import org.apache.hadoop.yarn.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; + +/** + * This interface abstracts out how a container contributes to + * Resource Utilization of the node. + * It is used by the {@link ContainerScheduler} to determine which + * OPPORTUNISTIC containers to be killed to make room for a GUARANTEED + * container. + */ +public interface ResourceUtilizationTracker { + + /** + * Get the current total utilization of all the Containers running on + * the node. + * @return ResourceUtilization Resource Utilization. + */ + ResourceUtilization getCurrentUtilization(); + + /** + * Add Container's resources to Node Utilization. + * @param container Container. + */ + void addContainerResources(Container container); + + /** + * Subtract Container's resources to Node Utilization. + * @param container Container. + */ + void subtractContainerResource(Container container); + + /** + * Check if NM has resources available currently to run the container. + * @param container Container. + * @return True, if NM has resources available currently to run the container. + */ + boolean hasResourcesAvailable(Container container); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/package-info.java new file mode 100644 index 0000000..4641ac0 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Container Scheduler + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 6105eff..291b488 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -23,6 +23,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; @@ -60,12 +61,21 @@ MutableGaugeInt goodLocalDirsDiskUtilizationPerc; @Metric("Disk utilization % on good log dirs") MutableGaugeInt goodLogDirsDiskUtilizationPerc; + + @Metric("Current allocated memory by opportunistic containers in GB") + MutableGaugeLong allocatedOpportunisticGB; + @Metric("Current allocated Virtual Cores by opportunistic containers") + MutableGaugeInt allocatedOpportunisticVCores; + @Metric("# of running opportunistic containers") + MutableGaugeInt runningOpportunisticContainers; + // CHECKSTYLE:ON:VisibilityModifier private JvmMetrics jvmMetrics = null; private long allocatedMB; private long availableMB; + private long allocatedOpportunisticMB; public NodeManagerMetrics(JvmMetrics jvmMetrics) { this.jvmMetrics = jvmMetrics; @@ -161,6 +171,22 @@ public void changeContainer(Resource before, Resource now) { availableVCores.decr(deltaVCores); } + public void startOpportunisticContainer(Resource res) { + runningOpportunisticContainers.incr(); + allocatedOpportunisticMB = allocatedOpportunisticMB + res.getMemorySize(); + allocatedOpportunisticGB + .set((int) Math.ceil(allocatedOpportunisticMB / 1024d)); + allocatedOpportunisticVCores.incr(res.getVirtualCores()); + } + + public void completeOpportunisticContainer(Resource res) { + runningOpportunisticContainers.decr(); + allocatedOpportunisticMB = allocatedOpportunisticMB - res.getMemorySize(); + allocatedOpportunisticGB + .set((int) Math.ceil(allocatedOpportunisticMB / 1024d)); + allocatedOpportunisticVCores.decr(res.getVirtualCores()); + } + public void addResource(Resource res) { availableMB = availableMB + res.getMemorySize(); availableGB.incr((int)Math.floor(availableMB/1024d)); @@ -237,4 +263,16 @@ public int getReInitializingContainer() { public int getContainersRolledbackOnFailure() { return containersRolledBackOnFailure.value(); } + + public long getAllocatedOpportunisticGB() { + return allocatedOpportunisticGB.value(); + } + + public int getAllocatedOpportunisticVCores() { + return allocatedOpportunisticVCores.value(); + } + + public int getRunningOpportunisticContainers() { + return runningOpportunisticContainers.value(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/DistributedScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/DistributedScheduler.java index 368858c..a9b5ed4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/DistributedScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/DistributedScheduler.java @@ -21,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateRequest; @@ -32,11 +33,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerStatus; -import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyApplicationContext; import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AbstractRequestInterceptor; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; @@ -48,7 +48,9 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** *

The DistributedScheduler runs on the NodeManager and is modeled as an @@ -74,6 +76,9 @@ private OpportunisticContainerContext oppContainerContext = new OpportunisticContainerContext(); + // Mapping of NodeId to NodeTokens. Populated either from RM response or + // generated locally if required. + private Map nodeTokens = new HashMap<>(); private ApplicationAttemptId applicationAttemptId; private OpportunisticContainerAllocator containerAllocator; private NMTokenSecretManagerInNM nmSecretManager; @@ -157,17 +162,17 @@ public AllocateResponse allocate(AllocateRequest request) throws } /** - * Check if we already have a NMToken. if Not, generate the Token and - * add it to the response + * Adds all the newly allocated Containers to the allocate Response. + * Additionally, in case the NMToken for one of the nodes does not exist, it + * generates one and adds it to the response. */ - private void updateResponseWithNMTokens(AllocateResponse response, + private void updateAllocateResponse(AllocateResponse response, List nmTokens, List allocatedContainers) { List newTokens = new ArrayList<>(); if (allocatedContainers.size() > 0) { response.getAllocatedContainers().addAll(allocatedContainers); for (Container alloc : allocatedContainers) { - if (!oppContainerContext.getNodeTokens().containsKey( - alloc.getNodeId())) { + if (!nodeTokens.containsKey(alloc.getNodeId())) { newTokens.add(nmSecretManager.generateNMToken(appSubmitter, alloc)); } } @@ -179,17 +184,14 @@ private void updateResponseWithNMTokens(AllocateResponse response, private void updateParameters( RegisterDistributedSchedulingAMResponse registerResponse) { - oppContainerContext.getAppParams().setMinResource( - registerResponse.getMinContainerResource()); - oppContainerContext.getAppParams().setMaxResource( - registerResponse.getMaxContainerResource()); - oppContainerContext.getAppParams().setIncrementResource( - registerResponse.getIncrContainerResource()); - if (oppContainerContext.getAppParams().getIncrementResource() == null) { - oppContainerContext.getAppParams().setIncrementResource( - oppContainerContext.getAppParams().getMinResource()); + Resource incrementResource = registerResponse.getIncrContainerResource(); + if (incrementResource == null) { + incrementResource = registerResponse.getMinContainerResource(); } - oppContainerContext.getAppParams().setContainerTokenExpiryInterval( + oppContainerContext.updateAllocationParams( + registerResponse.getMinContainerResource(), + registerResponse.getMaxContainerResource(), + incrementResource, registerResponse.getContainerTokenExpiryInterval()); oppContainerContext.getContainerIdGenerator() @@ -197,15 +199,8 @@ private void updateParameters( setNodeList(registerResponse.getNodesForScheduling()); } - private void setNodeList(List nodeList) { - oppContainerContext.getNodeMap().clear(); - addToNodeList(nodeList); - } - - private void addToNodeList(List nodes) { - for (NodeId n : nodes) { - oppContainerContext.getNodeMap().put(n.getHost(), n); - } + private void setNodeList(List nodeList) { + oppContainerContext.updateNodeList(nodeList); } @Override @@ -225,16 +220,27 @@ private void addToNodeList(List nodes) { public DistributedSchedulingAllocateResponse allocateForDistributedScheduling( DistributedSchedulingAllocateRequest request) throws YarnException, IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Forwarding allocate request to the" + - "Distributed Scheduler Service on YARN RM"); - } + + // Partition requests to GUARANTEED and OPPORTUNISTIC. + OpportunisticContainerAllocator.PartitionedResourceRequests + partitionedAsks = containerAllocator + .partitionAskList(request.getAllocateRequest().getAskList()); + + // Allocate OPPORTUNISTIC containers. List allocatedContainers = containerAllocator.allocateContainers( - request.getAllocateRequest(), applicationAttemptId, + request.getAllocateRequest().getResourceBlacklistRequest(), + partitionedAsks.getOpportunistic(), applicationAttemptId, oppContainerContext, rmIdentifier, appSubmitter); + // Prepare request for sending to RM for scheduling GUARANTEED containers. request.setAllocatedContainers(allocatedContainers); + request.getAllocateRequest().setAskList(partitionedAsks.getGuaranteed()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Forwarding allocate request to the" + + "Distributed Scheduler Service on YARN RM"); + } DistributedSchedulingAllocateResponse dsResp = getNextInterceptor().allocateForDistributedScheduling(request); @@ -243,30 +249,14 @@ public DistributedSchedulingAllocateResponse allocateForDistributedScheduling( setNodeList(dsResp.getNodesForScheduling()); List nmTokens = dsResp.getAllocateResponse().getNMTokens(); for (NMToken nmToken : nmTokens) { - oppContainerContext.getNodeTokens().put(nmToken.getNodeId(), nmToken); - } - - List completedContainers = - dsResp.getAllocateResponse().getCompletedContainersStatuses(); - - // Only account for opportunistic containers - for (ContainerStatus cs : completedContainers) { - if (cs.getExecutionType() == ExecutionType.OPPORTUNISTIC) { - oppContainerContext.getContainersAllocated() - .remove(cs.getContainerId()); - } + nodeTokens.put(nmToken.getNodeId(), nmToken); } // Check if we have NM tokens for all the allocated containers. If not // generate one and update the response. - updateResponseWithNMTokens( + updateAllocateResponse( dsResp.getAllocateResponse(), nmTokens, allocatedContainers); - if (LOG.isDebugEnabled()) { - LOG.debug("Number of opportunistic containers currently" + - "allocated by application: " + oppContainerContext - .getContainersAllocated().size()); - } return dsResp; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java index 35e7593..e1a9995 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java @@ -21,7 +21,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; @@ -38,7 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; -import org.apache.hadoop.yarn.util.ConverterUtils; + import org.apache.hadoop.yarn.webapp.NotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -149,7 +148,7 @@ private static void checkAccess(String remoteUser, Application application, private static void checkState(ContainerState state) { if (state == ContainerState.NEW || state == ContainerState.LOCALIZING || - state == ContainerState.LOCALIZED) { + state == ContainerState.SCHEDULED) { throw new NotFoundException("Container is not yet running. Current state is " + state); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java index a1e0bc7..4beccc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java @@ -86,6 +86,7 @@ protected void render(Block html) { ._("User", info.getUser()) ._("TotalMemoryNeeded", info.getMemoryNeeded()) ._("TotalVCoresNeeded", info.getVCoresNeeded()) + ._("ExecutionType", info.getExecutionType()) ._("logs", info.getShortLogLink(), "Link to logs"); html._(InfoBlock.class); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java index 10a8156..26d3f02 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java @@ -46,6 +46,7 @@ protected String user; protected long totalMemoryNeededMB; protected long totalVCoresNeeded; + private String executionType; protected String containerLogsLink; protected String nodeId; @XmlTransient @@ -84,6 +85,8 @@ public ContainerInfo(final Context nmContext, final Container container, this.totalMemoryNeededMB = res.getMemorySize(); this.totalVCoresNeeded = res.getVirtualCores(); } + this.executionType = + container.getContainerTokenIdentifier().getExecutionType().name(); this.containerLogsShortLink = ujoin("containerlogs", this.id, container.getUser()); @@ -143,6 +146,10 @@ public long getVCoresNeeded() { return this.totalVCoresNeeded; } + public String getExecutionType() { + return this.executionType; + } + public List getContainerLogFiles() { return this.containerLogFiles; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 3b84a78..8e4522b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.hadoop.fs.FileContext; @@ -158,7 +159,7 @@ public long getRMIdentifier() { containerManager.startContainers(allRequests); BaseContainerManagerTest.waitForContainerState(containerManager, cID, - ContainerState.RUNNING); + Arrays.asList(ContainerState.RUNNING, ContainerState.SCHEDULED), 20); List containerIds = new ArrayList(); containerIds.add(cID); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index f6593f9..04cfae9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -454,6 +454,14 @@ protected void rebootNodeStatusUpdaterAndRegisterWithRM() { if (containersShouldBePreserved) { Assert.assertFalse(containers.isEmpty()); Assert.assertTrue(containers.containsKey(existingCid)); + ContainerState state = containers.get(existingCid) + .cloneAndGetContainerStatus().getState(); + // Wait till RUNNING state... + int counter = 50; + while (state != ContainerState.RUNNING && counter > 0) { + Thread.sleep(100); + counter--; + } Assert.assertEquals(ContainerState.RUNNING, containers.get(existingCid) .cloneAndGetContainerStatus().getState()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index b3ad318..03e06d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -28,6 +28,7 @@ import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -255,7 +256,9 @@ public ContainerManagementProtocol run() { GetContainerStatusesRequest.newInstance(containerIds); ContainerStatus containerStatus = containerManager.getContainerStatuses(request).getContainerStatuses().get(0); - Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState()); + Assert.assertTrue( + EnumSet.of(ContainerState.RUNNING, ContainerState.SCHEDULED) + .contains(containerStatus.getState())); } public static ContainerId createContainerId() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index d76aa35..c679b92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -65,7 +65,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -1080,128 +1079,6 @@ public ContainerState getCurrentState() { Assert.assertTrue(containerIdSet.contains(runningContainerId)); } - @Test(timeout = 90000) - public void testKilledQueuedContainers() throws Exception { - NodeManager nm = new NodeManager(); - YarnConfiguration conf = new YarnConfiguration(); - conf.set( - NodeStatusUpdaterImpl - .YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, - "10000"); - nm.init(conf); - NodeStatusUpdaterImpl nodeStatusUpdater = - (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater(); - ApplicationId appId = ApplicationId.newInstance(0, 0); - ApplicationAttemptId appAttemptId = - ApplicationAttemptId.newInstance(appId, 0); - - // Add application to context. - nm.getNMContext().getApplications().putIfAbsent(appId, - mock(Application.class)); - - // Create a running container and add it to the context. - ContainerId runningContainerId = - ContainerId.newContainerId(appAttemptId, 1); - Token runningContainerToken = - BuilderUtils.newContainerToken(runningContainerId, 0, "anyHost", - 1234, "anyUser", BuilderUtils.newResource(1024, 1), 0, 123, - "password".getBytes(), 0); - Container runningContainer = - new ContainerImpl(conf, null, null, null, null, - BuilderUtils.newContainerTokenIdentifier(runningContainerToken), - nm.getNMContext()) { - @Override - public ContainerState getCurrentState() { - return ContainerState.RUNNING; - } - - @Override - public org.apache.hadoop.yarn.server.nodemanager.containermanager. - container.ContainerState getContainerState() { - return org.apache.hadoop.yarn.server.nodemanager.containermanager. - container.ContainerState.RUNNING; - } - }; - - nm.getNMContext().getContainers() - .put(runningContainerId, runningContainer); - - // Create two killed queued containers and add them to the queuing context. - ContainerId killedQueuedContainerId1 = ContainerId.newContainerId( - appAttemptId, 2); - ContainerTokenIdentifier killedQueuedContainerTokenId1 = BuilderUtils - .newContainerTokenIdentifier(BuilderUtils.newContainerToken( - killedQueuedContainerId1, 0, "anyHost", 1234, "anyUser", - BuilderUtils.newResource(1024, 1), 0, 123, - "password".getBytes(), 0)); - ContainerId killedQueuedContainerId2 = ContainerId.newContainerId( - appAttemptId, 3); - ContainerTokenIdentifier killedQueuedContainerTokenId2 = BuilderUtils - .newContainerTokenIdentifier(BuilderUtils.newContainerToken( - killedQueuedContainerId2, 0, "anyHost", 1234, "anyUser", - BuilderUtils.newResource(1024, 1), 0, 123, - "password".getBytes(), 0)); - - nm.getNMContext().getQueuingContext().getKilledQueuedContainers().put( - killedQueuedContainerTokenId1, "Queued container killed."); - nm.getNMContext().getQueuingContext().getKilledQueuedContainers().put( - killedQueuedContainerTokenId2, "Queued container killed."); - - List containerStatuses = nodeStatusUpdater - .getContainerStatuses(); - - Assert.assertEquals(3, containerStatuses.size()); - - ContainerStatus runningContainerStatus = null; - ContainerStatus killedQueuedContainerStatus1 = null; - ContainerStatus killedQueuedContainerStatus2 = null; - for (ContainerStatus cStatus : containerStatuses) { - if (ContainerState.RUNNING == cStatus.getState()) { - runningContainerStatus = cStatus; - } - if (ContainerState.COMPLETE == cStatus.getState()) { - if (killedQueuedContainerId1.equals(cStatus.getContainerId())) { - killedQueuedContainerStatus1 = cStatus; - } else { - killedQueuedContainerStatus2 = cStatus; - } - } - } - - // Check container IDs and Container Status. - Assert.assertNotNull(runningContainerId); - Assert.assertNotNull(killedQueuedContainerId1); - Assert.assertNotNull(killedQueuedContainerId2); - - // Killed queued container should have ABORTED exit status. - Assert.assertEquals(ContainerExitStatus.ABORTED, - killedQueuedContainerStatus1.getExitStatus()); - Assert.assertEquals(ContainerExitStatus.ABORTED, - killedQueuedContainerStatus2.getExitStatus()); - - // Killed queued container should appear in the recentlyStoppedContainers. - Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped( - killedQueuedContainerId1)); - Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped( - killedQueuedContainerId2)); - - // Check if killed queued containers are successfully removed from the - // queuing context. - List ackedContainers = new ArrayList(); - ackedContainers.add(killedQueuedContainerId1); - ackedContainers.add(killedQueuedContainerId2); - - nodeStatusUpdater.removeOrTrackCompletedContainersFromContext( - ackedContainers); - - containerStatuses = nodeStatusUpdater.getContainerStatuses(); - - // Only the running container should be in the container statuses now. - Assert.assertEquals(1, containerStatuses.size()); - Assert.assertEquals(ContainerState.RUNNING, - containerStatuses.get(0).getState()); - } - @Test(timeout = 10000) public void testCompletedContainersIsRecentlyStopped() throws Exception { NodeManager nm = new NodeManager(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 579bea9..4ec5069 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; + import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -691,11 +692,6 @@ public NodeStatusUpdater getNodeStatusUpdater() { return null; } - @Override - public QueuingContext getQueuingContext() { - return null; - } - public boolean isDistributedSchedulingEnabled() { return false; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index cb7815e..e009661 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; @@ -192,10 +193,10 @@ public void setup() throws IOException { conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); // Default delSrvc + exec = createContainerExecutor(); delSrvc = createDeletionService(); delSrvc.init(conf); - exec = createContainerExecutor(); dirsHandler = new LocalDirsHandlerService(); nodeHealthChecker = new NodeHealthCheckerService( NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); @@ -284,38 +285,52 @@ public void tearDown() throws IOException, InterruptedException { .build()); } - public static void waitForContainerState(ContainerManagementProtocol containerManager, - ContainerId containerID, ContainerState finalState) + public static void waitForContainerState( + ContainerManagementProtocol containerManager, ContainerId containerID, + ContainerState finalState) + throws InterruptedException, YarnException, IOException { + waitForContainerState(containerManager, containerID, + Arrays.asList(finalState), 20); + } + + public static void waitForContainerState( + ContainerManagementProtocol containerManager, ContainerId containerID, + ContainerState finalState, int timeOutMax) throws InterruptedException, YarnException, IOException { - waitForContainerState(containerManager, containerID, finalState, 20); + waitForContainerState(containerManager, containerID, + Arrays.asList(finalState), timeOutMax); } - public static void waitForContainerState(ContainerManagementProtocol containerManager, - ContainerId containerID, ContainerState finalState, int timeOutMax) - throws InterruptedException, YarnException, IOException { + public static void waitForContainerState( + ContainerManagementProtocol containerManager, ContainerId containerID, + List finalStates, int timeOutMax) + throws InterruptedException, YarnException, IOException { List list = new ArrayList(); list.add(containerID); GetContainerStatusesRequest request = GetContainerStatusesRequest.newInstance(list); ContainerStatus containerStatus = null; + HashSet fStates = + new HashSet<>(finalStates); int timeoutSecs = 0; do { Thread.sleep(2000); containerStatus = containerManager.getContainerStatuses(request) .getContainerStatuses().get(0); - LOG.info("Waiting for container to get into state " + finalState + LOG.info("Waiting for container to get into one of states " + fStates + ". Current state is " + containerStatus.getState()); timeoutSecs += 2; - } while (!containerStatus.getState().equals(finalState) + } while (!fStates.contains(containerStatus.getState()) && timeoutSecs < timeOutMax); LOG.info("Container state is " + containerStatus.getState()); - Assert.assertEquals("ContainerState is not correct (timedout)", - finalState, containerStatus.getState()); + Assert.assertTrue("ContainerState is not correct (timedout)", + fStates.contains(containerStatus.getState())); } - static void waitForApplicationState(ContainerManagerImpl containerManager, - ApplicationId appID, ApplicationState finalState) + public static void waitForApplicationState( + ContainerManagerImpl containerManager, ApplicationId appID, + ApplicationState finalState) throws InterruptedException { // Wait for app-finish Application app = @@ -344,7 +359,16 @@ public static void waitForNMContainerState(ContainerManagerImpl public static void waitForNMContainerState(ContainerManagerImpl containerManager, ContainerId containerID, org.apache.hadoop.yarn.server.nodemanager.containermanager - .container.ContainerState finalState, int timeOutMax) + .container.ContainerState finalState, int timeOutMax) + throws InterruptedException, YarnException, IOException { + waitForNMContainerState(containerManager, containerID, + Arrays.asList(finalState), timeOutMax); + } + + public static void waitForNMContainerState(ContainerManagerImpl + containerManager, ContainerId containerID, + List finalStates, int timeOutMax) throws InterruptedException, YarnException, IOException { Container container = null; org.apache.hadoop.yarn.server.nodemanager @@ -358,15 +382,15 @@ public static void waitForNMContainerState(ContainerManagerImpl currentState = container.getContainerState(); } if (currentState != null) { - LOG.info("Waiting for NM container to get into state " + finalState - + ". Current state is " + currentState); + LOG.info("Waiting for NM container to get into one of the following " + + "states: " + finalStates + ". Current state is " + currentState); } timeoutSecs += 2; - } while (!currentState.equals(finalState) + } while (!finalStates.contains(currentState) && timeoutSecs++ < timeOutMax); LOG.info("Container state is " + currentState); - Assert.assertEquals("ContainerState is not correct (timedout)", - finalState, currentState); + Assert.assertTrue("ContainerState is not correct (timedout)", + finalStates.contains(currentState)); } public static Token createContainerToken(ContainerId cId, long rmIdentifier, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 2d2c294..eb30c5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -95,14 +95,16 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler; -import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; + import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.Before; import org.junit.Test; @@ -552,6 +554,35 @@ protected void authorizeGetAndStopContainerRequest( throw new YarnException("Reject this container"); } } + @Override + protected ContainerScheduler createContainerScheduler(Context context) { + return new ContainerScheduler(context, dispatcher, metrics){ + @Override + public ContainersMonitor getContainersMonitor() { + return new ContainersMonitorImpl(null, null, null) { + @Override + public float getVmemRatio() { + return 2.0f; + } + + @Override + public long getVmemAllocatedForContainers() { + return 20480; + } + + @Override + public long getPmemAllocatedForContainers() { + return 10240; + } + + @Override + public long getVCoresAllocatedForContainers() { + return 4; + } + }; + } + }; + } }; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java deleted file mode 100644 index 71af76f..0000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRegression.java +++ /dev/null @@ -1,84 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.server.nodemanager.containermanager; - -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.UnsupportedFileSystemException; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.security.NMTokenIdentifier; -import org.apache.hadoop.yarn.server.nodemanager.DeletionService; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing.QueuingContainerManagerImpl; - -/** - * Test class that invokes all test cases of {@link TestContainerManager} while - * using the {@link QueuingContainerManagerImpl}. The goal is to assert that - * no regression is introduced in the existing cases when no queuing of tasks at - * the NMs is involved. - */ -public class TestContainerManagerRegression extends TestContainerManager { - - public TestContainerManagerRegression() - throws UnsupportedFileSystemException { - super(); - } - - static { - LOG = LogFactory.getLog(TestContainerManagerRegression.class); - } - - @Override - protected ContainerManagerImpl createContainerManager( - DeletionService delSrvc) { - return new QueuingContainerManagerImpl(context, exec, delSrvc, - nodeStatusUpdater, metrics, dirsHandler) { - @Override - public void - setBlockNewContainerRequests(boolean blockNewContainerRequests) { - // do nothing - } - - @Override - protected UserGroupInformation getRemoteUgi() throws YarnException { - ApplicationId appId = ApplicationId.newInstance(0, 0); - ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( - appId, 1); - UserGroupInformation ugi = UserGroupInformation.createRemoteUser( - appAttemptId.toString()); - ugi.addTokenIdentifier(new NMTokenIdentifier(appAttemptId, context - .getNodeId(), user, context.getNMTokenSecretManager() - .getCurrentKey().getKeyId())); - return ugi; - } - - @Override - protected void authorizeGetAndStopContainerRequest( - ContainerId containerId, Container container, boolean stopRequest, - NMTokenIdentifier identifier) throws YarnException { - if (container == null || container.getUser().equals("Fail")) { - throw new YarnException("Reject this container"); - } - } - }; - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 766a1f9..33f4609 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.reset; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.atLeastOnce; import java.io.IOException; import java.net.URISyntaxException; @@ -90,6 +91,11 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerMetrics; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; + + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerSchedulerEventType; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; @@ -143,7 +149,7 @@ public void testLocalizationLaunch() throws Exception { Map> localPaths = wc.localizeResources(); // all resources should be localized - assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); assertNotNull(wc.c.getLocalizedResources()); for (Entry> loc : wc.c.getLocalizedResources() .entrySet()) { @@ -421,7 +427,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerKilled() wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); - assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); wc.killContainer(); assertEquals(ContainerState.KILLING, wc.c.getContainerState()); @@ -452,7 +458,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerSuccess() wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); - assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); wc.killContainer(); assertEquals(ContainerState.KILLING, wc.c.getContainerState()); wc.containerSuccessful(); @@ -480,7 +486,7 @@ public void testKillOnLocalizedWhenContainerNotLaunchedContainerFailure() wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); - assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); wc.killContainer(); assertEquals(ContainerState.KILLING, wc.c.getContainerState()); wc.containerFailed(ExitCode.FORCE_KILLED.getExitCode()); @@ -507,7 +513,7 @@ public void testKillOnLocalizedWhenContainerLaunched() throws Exception { wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); - assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + assertEquals(ContainerState.SCHEDULED, wc.c.getContainerState()); ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); launcher.call(); wc.drainDispatcherEvents(); @@ -764,7 +770,7 @@ private void verifyCleanupCall(WrappedContainer wc) throws Exception { new ResourcesReleasedMatcher(wc.localResources, EnumSet.of( LocalResourceVisibility.PUBLIC, LocalResourceVisibility.PRIVATE, LocalResourceVisibility.APPLICATION)); - verify(wc.localizerBus).handle(argThat(matchesReq)); + verify(wc.localizerBus, atLeastOnce()).handle(argThat(matchesReq)); } private void verifyOutofBandHeartBeat(WrappedContainer wc) { @@ -890,6 +896,7 @@ public boolean matches(Object o) { final EventHandler auxBus; final EventHandler appBus; final EventHandler LogBus; + final EventHandler schedBus; final ContainersLauncher launcher; final ContainerLaunchContext ctxt; @@ -927,9 +934,16 @@ public boolean matches(Object o) { auxBus = mock(EventHandler.class); appBus = mock(EventHandler.class); LogBus = mock(EventHandler.class); + schedBus = new ContainerScheduler(context, dispatcher, metrics, 0) { + @Override + protected void scheduleContainer(Container container) { + container.sendLaunchEvent(); + } + }; dispatcher.register(LocalizationEventType.class, localizerBus); dispatcher.register(ContainersLauncherEventType.class, launcherBus); dispatcher.register(ContainersMonitorEventType.class, monitorBus); + dispatcher.register(ContainerSchedulerEventType.class, schedBus); dispatcher.register(AuxServicesEventType.class, auxBus); dispatcher.register(ApplicationEventType.class, appBus); dispatcher.register(LogHandlerEventType.class, LogBus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java similarity index 59% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java index caebef7..24e388f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/queuing/TestQueuingContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java @@ -16,15 +16,15 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.nodemanager.containermanager.queuing; +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; @@ -40,35 +40,41 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.NMTokenIdentifier; +import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.junit.Assert; import org.junit.Test; +import static org.mockito.Mockito.spy; + /** - * Class for testing the {@link QueuingContainerManagerImpl}. + * Tests to verify that the {@link ContainerScheduler} is able to queue and + * make room for containers. */ -public class TestQueuingContainerManager extends BaseContainerManagerTest { - public TestQueuingContainerManager() throws UnsupportedFileSystemException { +public class TestContainerSchedulerQueuing extends BaseContainerManagerTest { + public TestContainerSchedulerQueuing() throws UnsupportedFileSystemException { super(); } static { - LOG = LogFactory.getLog(TestQueuingContainerManager.class); + LOG = LogFactory.getLog(TestContainerSchedulerQueuing.class); } - boolean shouldDeleteWait = false; + private boolean delayContainers = true; @Override protected ContainerManagerImpl createContainerManager( DeletionService delSrvc) { - return new QueuingContainerManagerImpl(context, exec, delSrvc, + return new ContainerManagerImpl(context, exec, delSrvc, nodeStatusUpdater, metrics, dirsHandler) { @Override public void @@ -117,33 +123,29 @@ public long getVCoresAllocatedForContainers() { } @Override - protected DeletionService createDeletionService() { - return new DeletionService(exec) { + protected ContainerExecutor createContainerExecutor() { + DefaultContainerExecutor exec = new DefaultContainerExecutor() { @Override - public void delete(String user, Path subDir, Path... baseDirs) { - // Don't do any deletions. - if (shouldDeleteWait) { + public int launchContainer(ContainerStartContext ctx) throws IOException { + if (delayContainers) { try { Thread.sleep(10000); - LOG.info("\n\nSleeping Pseudo delete : user - " + user + ", " + - "subDir - " + subDir + ", " + - "baseDirs - " + Arrays.asList(baseDirs)); } catch (InterruptedException e) { - e.printStackTrace(); + // Nothing.. } - } else { - LOG.info("\n\nPseudo delete : user - " + user + ", " + - "subDir - " + subDir + ", " + - "baseDirs - " + Arrays.asList(baseDirs)); } + return super.launchContainer(ctx); } }; + exec.setConf(conf); + return spy(exec); } @Override public void setup() throws IOException { + conf.setInt( + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); super.setup(); - shouldDeleteWait = false; } /** @@ -152,7 +154,6 @@ public void setup() throws IOException { */ @Test public void testStartMultipleContainers() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -209,7 +210,6 @@ public void testStartMultipleContainers() throws Exception { */ @Test public void testQueueMultipleContainers() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -248,17 +248,18 @@ public void testQueueMultipleContainers() throws Exception { .getContainerStatuses(statRequest).getContainerStatuses(); for (ContainerStatus status : containerStatuses) { Assert.assertEquals( - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED, status.getState()); } + ContainerScheduler containerScheduler = + containerManager.getContainerScheduler(); // Ensure both containers are properly queued. - Assert.assertEquals(2, containerManager.getContext().getQueuingContext() - .getQueuedContainers().size()); - Assert.assertEquals(1, ((QueuingContainerManagerImpl) containerManager) - .getNumQueuedGuaranteedContainers()); - Assert.assertEquals(1, ((QueuingContainerManagerImpl) containerManager) - .getNumQueuedOpportunisticContainers()); + Assert.assertEquals(2, containerScheduler.getNumQueuedContainers()); + Assert.assertEquals(1, + containerScheduler.getNumQueuedGuaranteedContainers()); + Assert.assertEquals(1, + containerScheduler.getNumQueuedOpportunisticContainers()); } /** @@ -268,7 +269,6 @@ public void testQueueMultipleContainers() throws Exception { */ @Test public void testStartAndQueueMultipleContainers() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -319,18 +319,19 @@ public void testStartAndQueueMultipleContainers() throws Exception { status.getState()); } else { Assert.assertEquals( - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED, status.getState()); } } + ContainerScheduler containerScheduler = + containerManager.getContainerScheduler(); // Ensure two containers are properly queued. - Assert.assertEquals(2, containerManager.getContext().getQueuingContext() - .getQueuedContainers().size()); - Assert.assertEquals(0, ((QueuingContainerManagerImpl) containerManager) - .getNumQueuedGuaranteedContainers()); - Assert.assertEquals(2, ((QueuingContainerManagerImpl) containerManager) - .getNumQueuedOpportunisticContainers()); + Assert.assertEquals(2, containerScheduler.getNumQueuedContainers()); + Assert.assertEquals(0, + containerScheduler.getNumQueuedGuaranteedContainers()); + Assert.assertEquals(2, + containerScheduler.getNumQueuedOpportunisticContainers()); } /** @@ -344,7 +345,6 @@ public void testStartAndQueueMultipleContainers() throws Exception { */ @Test public void testKillOpportunisticForGuaranteedContainer() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -393,11 +393,11 @@ public void testKillOpportunisticForGuaranteedContainer() throws Exception { .getContainerStatuses(statRequest).getContainerStatuses(); for (ContainerStatus status : containerStatuses) { if (status.getContainerId().equals(createContainerId(0))) { - Assert.assertTrue(status.getDiagnostics() - .contains("Container killed by the ApplicationMaster")); + Assert.assertTrue(status.getDiagnostics().contains( + "Container Killed to make room for Guaranteed Container")); } else if (status.getContainerId().equals(createContainerId(1))) { Assert.assertEquals( - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED, status.getState()); } else if (status.getContainerId().equals(createContainerId(2))) { Assert.assertEquals( @@ -421,13 +421,203 @@ public void testKillOpportunisticForGuaranteedContainer() throws Exception { } /** + * 1. Submit a long running GUARANTEED container to hog all NM resources. + * 2. Submit 6 OPPORTUNISTIC containers, all of which will be queued. + * 3. Update the Queue Limit to 2. + * 4. Ensure only 2 containers remain in the Queue, and 4 are de-Queued. + * @throws Exception + */ + @Test + public void testQueueShedding() throws Exception { + containerManager.start(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + containerLaunchContext.setCommands(Arrays.asList("sleep 100")); + + List list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(2048, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(3), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(4), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(5), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(6), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + + allRequests = StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + ContainerScheduler containerScheduler = + containerManager.getContainerScheduler(); + // Ensure all containers are properly queued. + int numTries = 30; + while ((containerScheduler.getNumQueuedContainers() < 6) && + (numTries-- > 0)) { + Thread.sleep(100); + } + Assert.assertEquals(6, containerScheduler.getNumQueuedContainers()); + + ContainerQueuingLimit containerQueuingLimit = ContainerQueuingLimit + .newInstance(); + containerQueuingLimit.setMaxQueueLength(2); + containerScheduler.updateQueuingLimit(containerQueuingLimit); + numTries = 30; + while ((containerScheduler.getNumQueuedContainers() > 2) && + (numTries-- > 0)) { + Thread.sleep(100); + } + Assert.assertEquals(2, containerScheduler.getNumQueuedContainers()); + + List statList = new ArrayList(); + for (int i = 1; i < 7; i++) { + statList.add(createContainerId(i)); + } + GetContainerStatusesRequest statRequest = + GetContainerStatusesRequest.newInstance(statList); + List containerStatuses = containerManager + .getContainerStatuses(statRequest).getContainerStatuses(); + + int deQueuedContainers = 0; + int numQueuedOppContainers = 0; + for (ContainerStatus status : containerStatuses) { + if (status.getExecutionType() == ExecutionType.OPPORTUNISTIC) { + if (status.getDiagnostics().contains( + "Container De-queued to meet NM queuing limits")) { + deQueuedContainers++; + } + if (status.getState() == + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) { + numQueuedOppContainers++; + } + } + } + Assert.assertEquals(4, deQueuedContainers); + Assert.assertEquals(2, numQueuedOppContainers); + } + + /** + * 1. Submit a long running GUARANTEED container to hog all NM resources. + * 2. Submit 2 OPPORTUNISTIC containers, both of which will be queued. + * 3. Send Stop Container to one of the queued containers. + * 4. Ensure container is removed from the queue. + * @throws Exception + */ + @Test + public void testContainerDeQueuedAfterAMKill() throws Exception { + containerManager.start(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + containerLaunchContext.setCommands(Arrays.asList("sleep 100")); + + List list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(2048, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(2), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + + allRequests = StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + ContainerScheduler containerScheduler = + containerManager.getContainerScheduler(); + // Ensure both containers are properly queued. + int numTries = 30; + while ((containerScheduler.getNumQueuedContainers() < 2) && + (numTries-- > 0)) { + Thread.sleep(100); + } + Assert.assertEquals(2, containerScheduler.getNumQueuedContainers()); + + containerManager.stopContainers( + StopContainersRequest.newInstance(Arrays.asList(createContainerId(2)))); + + numTries = 30; + while ((containerScheduler.getNumQueuedContainers() > 1) && + (numTries-- > 0)) { + Thread.sleep(100); + } + Assert.assertEquals(1, containerScheduler.getNumQueuedContainers()); + } + + /** * Submit three OPPORTUNISTIC containers that can run concurrently, and one * GUARANTEED that needs to kill two of the OPPORTUNISTIC for it to run. * @throws Exception */ @Test public void testKillMultipleOpportunisticContainers() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -455,6 +645,12 @@ public void testKillMultipleOpportunisticContainers() throws Exception { user, BuilderUtils.newResource(512, 1), context.getContainerTokenSecretManager(), null, ExecutionType.OPPORTUNISTIC))); + + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + list = new ArrayList<>(); list.add(StartContainerRequest.newInstance( containerLaunchContext, createContainerToken(createContainerId(3), DUMMY_RM_IDENTIFIER, @@ -463,10 +659,81 @@ public void testKillMultipleOpportunisticContainers() throws Exception { context.getContainerTokenSecretManager(), null, ExecutionType.GUARANTEED))); + allRequests = StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + BaseContainerManagerTest.waitForNMContainerState( + containerManager, createContainerId(0), + Arrays.asList(ContainerState.DONE, + ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL), 40); + Thread.sleep(5000); + + // Get container statuses. Container 0 should be killed, container 1 + // should be queued and container 2 should be running. + int killedContainers = 0; + List statList = new ArrayList(); + for (int i = 0; i < 4; i++) { + statList.add(createContainerId(i)); + } + GetContainerStatusesRequest statRequest = + GetContainerStatusesRequest.newInstance(statList); + List containerStatuses = containerManager + .getContainerStatuses(statRequest).getContainerStatuses(); + for (ContainerStatus status : containerStatuses) { + if (status.getDiagnostics().contains( + "Container Killed to make room for Guaranteed Container")) { + killedContainers++; + } + System.out.println("\nStatus : [" + status + "]\n"); + } + + Assert.assertEquals(2, killedContainers); + } + + /** + * Submit four OPPORTUNISTIC containers that can run concurrently, and then + * two GUARANTEED that needs to kill Exactly two of the OPPORTUNISTIC for + * it to run. Make sure only 2 are killed. + * @throws Exception + */ + @Test + public void testKillOnlyRequiredOpportunisticContainers() throws Exception { + containerManager.start(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + + List list = new ArrayList<>(); + // Fill NM with Opportunistic containers + for (int i = 0; i < 4; i++) { + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + } + StartContainersRequest allRequests = StartContainersRequest.newInstance(list); containerManager.startContainers(allRequests); + list = new ArrayList<>(); + // Now ask for two Guaranteed containers + for (int i = 4; i < 6; i++) { + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(512, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + } + + allRequests = StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + BaseContainerManagerTest.waitForNMContainerState(containerManager, createContainerId(0), ContainerState.DONE, 40); Thread.sleep(5000); @@ -475,7 +742,7 @@ public void testKillMultipleOpportunisticContainers() throws Exception { // should be queued and container 2 should be running. int killedContainers = 0; List statList = new ArrayList(); - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 6; i++) { statList.add(createContainerId(i)); } GetContainerStatusesRequest statRequest = @@ -484,7 +751,7 @@ public void testKillMultipleOpportunisticContainers() throws Exception { .getContainerStatuses(statRequest).getContainerStatuses(); for (ContainerStatus status : containerStatuses) { if (status.getDiagnostics().contains( - "Container killed by the ApplicationMaster")) { + "Container Killed to make room for Guaranteed Container")) { killedContainers++; } System.out.println("\nStatus : [" + status + "]\n"); @@ -500,7 +767,6 @@ public void testKillMultipleOpportunisticContainers() throws Exception { */ @Test public void testStopQueuedContainer() throws Exception { - shouldDeleteWait = true; containerManager.start(); ContainerLaunchContext containerLaunchContext = @@ -551,7 +817,7 @@ public void testStopQueuedContainer() throws Exception { org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) { runningContainersNo++; } else if (status.getState() == - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED) { + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) { queuedContainersNo++; } System.out.println("\nStatus : [" + status + "]\n"); @@ -572,23 +838,35 @@ public void testStopQueuedContainer() throws Exception { for (int i = 0; i < 3; i++) { statList.add(createContainerId(i)); } + statRequest = GetContainerStatusesRequest.newInstance(statList); - containerStatuses = containerManager.getContainerStatuses(statRequest) - .getContainerStatuses(); - for (ContainerStatus status : containerStatuses) { - if (status.getContainerId().equals(createContainerId(0))) { - Assert.assertEquals( - org.apache.hadoop.yarn.api.records.ContainerState.RUNNING, - status.getState()); - } else if (status.getContainerId().equals(createContainerId(1))) { - Assert.assertTrue(status.getDiagnostics().contains( - "Queued container request removed")); - } else if (status.getContainerId().equals(createContainerId(2))) { - Assert.assertEquals( - org.apache.hadoop.yarn.api.records.ContainerState.QUEUED, - status.getState()); + HashMap + map = new HashMap<>(); + for (int i=0; i < 10; i++) { + containerStatuses = containerManager.getContainerStatuses(statRequest) + .getContainerStatuses(); + for (ContainerStatus status : containerStatuses) { + System.out.println("\nStatus : [" + status + "]\n"); + map.put(status.getState(), status); + if (map.containsKey( + org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) && + map.containsKey( + org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) && + map.containsKey( + org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE)) { + break; + } + Thread.sleep(1000); } - System.out.println("\nStatus : [" + status + "]\n"); } + Assert.assertEquals(createContainerId(0), + map.get(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING) + .getContainerId()); + Assert.assertEquals(createContainerId(1), + map.get(org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE) + .getContainerId()); + Assert.assertEquals(createContainerId(2), + map.get(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED) + .getContainerId()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestDistributedScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestDistributedScheduler.java index 8f1ae7f..736dc31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestDistributedScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/scheduler/TestDistributedScheduler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterDistributedSchedulingAMResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.RequestInterceptor; @@ -74,7 +75,8 @@ public void testDistributedScheduler() throws Exception { RequestInterceptor finalReqIntcptr = setup(conf, distributedScheduler); registerAM(distributedScheduler, finalReqIntcptr, Arrays.asList( - NodeId.newInstance("a", 1), NodeId.newInstance("b", 2))); + RemoteNode.newInstance(NodeId.newInstance("a", 1), "http://a:1"), + RemoteNode.newInstance(NodeId.newInstance("b", 2), "http://b:2"))); final AtomicBoolean flipFlag = new AtomicBoolean(true); Mockito.when( @@ -87,10 +89,16 @@ public DistributedSchedulingAllocateResponse answer( flipFlag.set(!flipFlag.get()); if (flipFlag.get()) { return createAllocateResponse(Arrays.asList( - NodeId.newInstance("c", 3), NodeId.newInstance("d", 4))); + RemoteNode.newInstance( + NodeId.newInstance("c", 3), "http://c:3"), + RemoteNode.newInstance( + NodeId.newInstance("d", 4), "http://d:4"))); } else { return createAllocateResponse(Arrays.asList( - NodeId.newInstance("d", 4), NodeId.newInstance("c", 3))); + RemoteNode.newInstance( + NodeId.newInstance("d", 4), "http://d:4"), + RemoteNode.newInstance( + NodeId.newInstance("c", 3), "http://c:3"))); } } }); @@ -164,7 +172,7 @@ public DistributedSchedulingAllocateResponse answer( } private void registerAM(DistributedScheduler distributedScheduler, - RequestInterceptor finalReqIntcptr, List nodeList) + RequestInterceptor finalReqIntcptr, List nodeList) throws Exception { RegisterDistributedSchedulingAMResponse distSchedRegisterResponse = Records.newRecord(RegisterDistributedSchedulingAMResponse.class); @@ -208,7 +216,7 @@ public void setBytes(ByteBuffer bytes) {} }; nmContainerTokenSecretManager.setMasterKey(mKey); OpportunisticContainerAllocator containerAllocator = - new OpportunisticContainerAllocator(nmContainerTokenSecretManager, 77); + new OpportunisticContainerAllocator(nmContainerTokenSecretManager); NMTokenSecretManagerInNM nmTokenSecretManagerInNM = new NMTokenSecretManagerInNM(); @@ -236,7 +244,7 @@ private ResourceRequest createResourceRequest(ExecutionType execType, } private DistributedSchedulingAllocateResponse createAllocateResponse( - List nodes) { + List nodes) { DistributedSchedulingAllocateResponse distSchedAllocateResponse = Records.newRecord(DistributedSchedulingAllocateResponse.class); distSchedAllocateResponse diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index 5f1aab9..92966ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -209,4 +209,19 @@ public boolean canRollback() { public void commitUpgrade() { } + + @Override + public boolean isMarkedForKilling() { + return false; + } + + @Override + public void sendLaunchEvent() { + + } + + @Override + public void sendKillEvent(int exitStatus, String description) { + + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java index 2cc77ae..0af520f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java @@ -507,7 +507,7 @@ public void verifyContainersInfoXML(NodeList nodes, Container cont) public void verifyNodeContainerInfo(JSONObject info, Container cont) throws JSONException, Exception { - assertEquals("incorrect number of elements", 10, info.length()); + assertEquals("incorrect number of elements", 11, info.length()); verifyNodeContainerInfoGeneric(cont, info.getString("id"), info.getString("state"), info.getString("user"), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 108c327..c2b0012 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -400,7 +400,6 @@ public AllocateResponse allocate(AllocateRequest request) ApplicationAttemptId appAttemptId = amrmTokenIdentifier.getApplicationAttemptId(); - ApplicationId applicationId = appAttemptId.getApplicationId(); this.amLivelinessMonitor.receivedPing(appAttemptId); @@ -417,8 +416,10 @@ public AllocateResponse allocate(AllocateRequest request) AllocateResponse lastResponse = lock.getAllocateResponse(); if (!hasApplicationMasterRegistered(appAttemptId)) { String message = - "AM is not registered for known application attempt: " + appAttemptId - + " or RM had restarted after AM registered . AM should re-register."; + "AM is not registered for known application attempt: " + + appAttemptId + + " or RM had restarted after AM registered. " + + " AM should re-register."; throw new ApplicationMasterNotRegisteredException(message); } @@ -433,178 +434,10 @@ public AllocateResponse allocate(AllocateRequest request) throw new InvalidApplicationMasterRequestException(message); } - //filter illegal progress values - float filteredProgress = request.getProgress(); - if (Float.isNaN(filteredProgress) || filteredProgress == Float.NEGATIVE_INFINITY - || filteredProgress < 0) { - request.setProgress(0); - } else if (filteredProgress > 1 || filteredProgress == Float.POSITIVE_INFINITY) { - request.setProgress(1); - } - - // Send the status update to the appAttempt. - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptStatusupdateEvent(appAttemptId, request - .getProgress())); - - List ask = request.getAskList(); - List release = request.getReleaseList(); - - ResourceBlacklistRequest blacklistRequest = - request.getResourceBlacklistRequest(); - List blacklistAdditions = - (blacklistRequest != null) ? - blacklistRequest.getBlacklistAdditions() : Collections.EMPTY_LIST; - List blacklistRemovals = - (blacklistRequest != null) ? - blacklistRequest.getBlacklistRemovals() : Collections.EMPTY_LIST; - RMApp app = - this.rmContext.getRMApps().get(applicationId); - - // set label expression for Resource Requests if resourceName=ANY - ApplicationSubmissionContext asc = app.getApplicationSubmissionContext(); - for (ResourceRequest req : ask) { - if (null == req.getNodeLabelExpression() - && ResourceRequest.ANY.equals(req.getResourceName())) { - req.setNodeLabelExpression(asc.getNodeLabelExpression()); - } - } - - Resource maximumCapacity = rScheduler.getMaximumResourceCapability(); - - // sanity check - try { - RMServerUtils.normalizeAndValidateRequests(ask, - maximumCapacity, app.getQueue(), - rScheduler, rmContext); - } catch (InvalidResourceRequestException e) { - LOG.warn("Invalid resource ask by application " + appAttemptId, e); - throw e; - } - - try { - RMServerUtils.validateBlacklistRequest(blacklistRequest); - } catch (InvalidResourceBlacklistRequestException e) { - LOG.warn("Invalid blacklist request by application " + appAttemptId, e); - throw e; - } - - // In the case of work-preserving AM restart, it's possible for the - // AM to release containers from the earlier attempt. - if (!app.getApplicationSubmissionContext() - .getKeepContainersAcrossApplicationAttempts()) { - try { - RMServerUtils.validateContainerReleaseRequest(release, appAttemptId); - } catch (InvalidContainerReleaseException e) { - LOG.warn("Invalid container release by application " + appAttemptId, - e); - throw e; - } - } - - // Split Update Resource Requests into increase and decrease. - // No Exceptions are thrown here. All update errors are aggregated - // and returned to the AM. - List increaseResourceReqs = new ArrayList<>(); - List decreaseResourceReqs = new ArrayList<>(); - List updateContainerErrors = - RMServerUtils.validateAndSplitUpdateResourceRequests(rmContext, - request, maximumCapacity, increaseResourceReqs, - decreaseResourceReqs); - - // Send new requests to appAttempt. - Allocation allocation; - RMAppAttemptState state = - app.getRMAppAttempt(appAttemptId).getAppAttemptState(); - if (state.equals(RMAppAttemptState.FINAL_SAVING) || - state.equals(RMAppAttemptState.FINISHING) || - app.isAppFinalStateStored()) { - LOG.warn(appAttemptId + " is in " + state + - " state, ignore container allocate request."); - allocation = EMPTY_ALLOCATION; - } else { - allocation = - this.rScheduler.allocate(appAttemptId, ask, release, - blacklistAdditions, blacklistRemovals, - increaseResourceReqs, decreaseResourceReqs); - } - - if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) { - LOG.info("blacklist are updated in Scheduler." + - "blacklistAdditions: " + blacklistAdditions + ", " + - "blacklistRemovals: " + blacklistRemovals); - } - RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId); - AllocateResponse allocateResponse = + AllocateResponse response = recordFactory.newRecordInstance(AllocateResponse.class); - if (!allocation.getContainers().isEmpty()) { - allocateResponse.setNMTokens(allocation.getNMTokens()); - } - - // Notify the AM of container update errors - if (!updateContainerErrors.isEmpty()) { - allocateResponse.setUpdateErrors(updateContainerErrors); - } - // update the response with the deltas of node status changes - List updatedNodes = new ArrayList(); - if(app.pullRMNodeUpdates(updatedNodes) > 0) { - List updatedNodeReports = new ArrayList(); - for(RMNode rmNode: updatedNodes) { - SchedulerNodeReport schedulerNodeReport = - rScheduler.getNodeReport(rmNode.getNodeID()); - Resource used = BuilderUtils.newResource(0, 0); - int numContainers = 0; - if (schedulerNodeReport != null) { - used = schedulerNodeReport.getUsedResource(); - numContainers = schedulerNodeReport.getNumContainers(); - } - NodeId nodeId = rmNode.getNodeID(); - NodeReport report = - BuilderUtils.newNodeReport(nodeId, rmNode.getState(), - rmNode.getHttpAddress(), rmNode.getRackName(), used, - rmNode.getTotalCapability(), numContainers, - rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), - rmNode.getNodeLabels()); - - updatedNodeReports.add(report); - } - allocateResponse.setUpdatedNodes(updatedNodeReports); - } - - allocateResponse.setAllocatedContainers(allocation.getContainers()); - allocateResponse.setCompletedContainersStatuses(appAttempt - .pullJustFinishedContainers()); - allocateResponse.setResponseId(lastResponse.getResponseId() + 1); - allocateResponse.setAvailableResources(allocation.getResourceLimit()); - - // Handling increased/decreased containers - List updatedContainers = new ArrayList<>(); - if (allocation.getIncreasedContainers() != null) { - for (Container c : allocation.getIncreasedContainers()) { - updatedContainers.add( - UpdatedContainer.newInstance( - ContainerUpdateType.INCREASE_RESOURCE, c)); - } - } - if (allocation.getDecreasedContainers() != null) { - for (Container c : allocation.getDecreasedContainers()) { - updatedContainers.add( - UpdatedContainer.newInstance( - ContainerUpdateType.DECREASE_RESOURCE, c)); - } - } - - allocateResponse.setUpdatedContainers(updatedContainers); - - allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); - - // add preemption to the allocateResponse message (if any) - allocateResponse - .setPreemptionMessage(generatePreemptionMessage(allocation)); - - // Set application priority - allocateResponse.setApplicationPriority(app - .getApplicationPriority()); + allocateInternal(amrmTokenIdentifier.getApplicationAttemptId(), + request, response); // update AMRMToken if the token is rolled-up MasterKeyData nextMasterKey = @@ -612,21 +445,24 @@ public AllocateResponse allocate(AllocateRequest request) if (nextMasterKey != null && nextMasterKey.getMasterKey().getKeyId() != amrmTokenIdentifier - .getKeyId()) { + .getKeyId()) { + RMApp app = + this.rmContext.getRMApps().get(appAttemptId.getApplicationId()); + RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId); RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl)appAttempt; Token amrmToken = appAttempt.getAMRMToken(); if (nextMasterKey.getMasterKey().getKeyId() != appAttemptImpl.getAMRMTokenKeyId()) { LOG.info("The AMRMToken has been rolled-over. Send new AMRMToken back" - + " to application: " + applicationId); + + " to application: " + appAttemptId.getApplicationId()); amrmToken = rmContext.getAMRMTokenSecretManager() .createAndGetAMRMToken(appAttemptId); appAttemptImpl.setAMRMToken(amrmToken); } - allocateResponse.setAMRMToken(org.apache.hadoop.yarn.api.records.Token - .newInstance(amrmToken.getIdentifier(), amrmToken.getKind() - .toString(), amrmToken.getPassword(), amrmToken.getService() - .toString())); + response.setAMRMToken(org.apache.hadoop.yarn.api.records.Token + .newInstance(amrmToken.getIdentifier(), amrmToken.getKind() + .toString(), amrmToken.getPassword(), amrmToken.getService() + .toString())); } /* @@ -634,11 +470,220 @@ public AllocateResponse allocate(AllocateRequest request) * need to worry about unregister call occurring in between (which * removes the lock object). */ - lock.setAllocateResponse(allocateResponse); - return allocateResponse; + response.setResponseId(lastResponse.getResponseId() + 1); + lock.setAllocateResponse(response); + return response; } } + protected void allocateInternal(ApplicationAttemptId appAttemptId, + AllocateRequest request, AllocateResponse allocateResponse) + throws YarnException { + + //filter illegal progress values + float filteredProgress = request.getProgress(); + if (Float.isNaN(filteredProgress) || + filteredProgress == Float.NEGATIVE_INFINITY || + filteredProgress < 0) { + request.setProgress(0); + } else if (filteredProgress > 1 || + filteredProgress == Float.POSITIVE_INFINITY) { + request.setProgress(1); + } + + // Send the status update to the appAttempt. + this.rmContext.getDispatcher().getEventHandler().handle( + new RMAppAttemptStatusupdateEvent(appAttemptId, request + .getProgress())); + + List ask = request.getAskList(); + List release = request.getReleaseList(); + + ResourceBlacklistRequest blacklistRequest = + request.getResourceBlacklistRequest(); + List blacklistAdditions = + (blacklistRequest != null) ? + blacklistRequest.getBlacklistAdditions() : Collections.EMPTY_LIST; + List blacklistRemovals = + (blacklistRequest != null) ? + blacklistRequest.getBlacklistRemovals() : Collections.EMPTY_LIST; + RMApp app = + this.rmContext.getRMApps().get(appAttemptId.getApplicationId()); + + // set label expression for Resource Requests if resourceName=ANY + ApplicationSubmissionContext asc = app.getApplicationSubmissionContext(); + for (ResourceRequest req : ask) { + if (null == req.getNodeLabelExpression() + && ResourceRequest.ANY.equals(req.getResourceName())) { + req.setNodeLabelExpression(asc.getNodeLabelExpression()); + } + } + + Resource maximumCapacity = rScheduler.getMaximumResourceCapability(); + + // sanity check + try { + RMServerUtils.normalizeAndValidateRequests(ask, + maximumCapacity, app.getQueue(), + rScheduler, rmContext); + } catch (InvalidResourceRequestException e) { + LOG.warn("Invalid resource ask by application " + appAttemptId, e); + throw e; + } + + try { + RMServerUtils.validateBlacklistRequest(blacklistRequest); + } catch (InvalidResourceBlacklistRequestException e) { + LOG.warn("Invalid blacklist request by application " + appAttemptId, e); + throw e; + } + + // In the case of work-preserving AM restart, it's possible for the + // AM to release containers from the earlier attempt. + if (!app.getApplicationSubmissionContext() + .getKeepContainersAcrossApplicationAttempts()) { + try { + RMServerUtils.validateContainerReleaseRequest(release, appAttemptId); + } catch (InvalidContainerReleaseException e) { + LOG.warn("Invalid container release by application " + appAttemptId, + e); + throw e; + } + } + + // Split Update Resource Requests into increase and decrease. + // No Exceptions are thrown here. All update errors are aggregated + // and returned to the AM. + List increaseResourceReqs = new ArrayList<>(); + List decreaseResourceReqs = new ArrayList<>(); + List updateContainerErrors = + RMServerUtils.validateAndSplitUpdateResourceRequests( + rmContext, request, maximumCapacity, + increaseResourceReqs, decreaseResourceReqs); + + // Send new requests to appAttempt. + Allocation allocation; + RMAppAttemptState state = + app.getRMAppAttempt(appAttemptId).getAppAttemptState(); + if (state.equals(RMAppAttemptState.FINAL_SAVING) || + state.equals(RMAppAttemptState.FINISHING) || + app.isAppFinalStateStored()) { + LOG.warn(appAttemptId + " is in " + state + + " state, ignore container allocate request."); + allocation = EMPTY_ALLOCATION; + } else { + allocation = + this.rScheduler.allocate(appAttemptId, ask, release, + blacklistAdditions, blacklistRemovals, + increaseResourceReqs, decreaseResourceReqs); + } + + if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) { + LOG.info("blacklist are updated in Scheduler." + + "blacklistAdditions: " + blacklistAdditions + ", " + + "blacklistRemovals: " + blacklistRemovals); + } + RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId); + + if (allocation.getNMTokens() != null && + !allocation.getNMTokens().isEmpty()) { + allocateResponse.setNMTokens(allocation.getNMTokens()); + } + + // Notify the AM of container update errors + addToUpdateContainerErrors(allocateResponse, updateContainerErrors); + + // update the response with the deltas of node status changes + List updatedNodes = new ArrayList(); + if(app.pullRMNodeUpdates(updatedNodes) > 0) { + List updatedNodeReports = new ArrayList(); + for(RMNode rmNode: updatedNodes) { + SchedulerNodeReport schedulerNodeReport = + rScheduler.getNodeReport(rmNode.getNodeID()); + Resource used = BuilderUtils.newResource(0, 0); + int numContainers = 0; + if (schedulerNodeReport != null) { + used = schedulerNodeReport.getUsedResource(); + numContainers = schedulerNodeReport.getNumContainers(); + } + NodeId nodeId = rmNode.getNodeID(); + NodeReport report = + BuilderUtils.newNodeReport(nodeId, rmNode.getState(), + rmNode.getHttpAddress(), rmNode.getRackName(), used, + rmNode.getTotalCapability(), numContainers, + rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), + rmNode.getNodeLabels()); + + updatedNodeReports.add(report); + } + allocateResponse.setUpdatedNodes(updatedNodeReports); + } + + addToAllocatedContainers(allocateResponse, allocation.getContainers()); + + allocateResponse.setCompletedContainersStatuses(appAttempt + .pullJustFinishedContainers()); + allocateResponse.setAvailableResources(allocation.getResourceLimit()); + + // Handling increased containers + addToUpdatedContainers( + allocateResponse, ContainerUpdateType.INCREASE_RESOURCE, + allocation.getIncreasedContainers()); + + // Handling decreased containers + addToUpdatedContainers( + allocateResponse, ContainerUpdateType.DECREASE_RESOURCE, + allocation.getDecreasedContainers()); + + allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); + + // add preemption to the allocateResponse message (if any) + allocateResponse + .setPreemptionMessage(generatePreemptionMessage(allocation)); + + // Set application priority + allocateResponse.setApplicationPriority(app + .getApplicationPriority()); + } + + protected void addToUpdateContainerErrors(AllocateResponse allocateResponse, + List updateContainerErrors) { + if (!updateContainerErrors.isEmpty()) { + if (allocateResponse.getUpdateErrors() != null + && !allocateResponse.getUpdateErrors().isEmpty()) { + updateContainerErrors = new ArrayList<>(updateContainerErrors); + updateContainerErrors.addAll(allocateResponse.getUpdateErrors()); + } + allocateResponse.setUpdateErrors(updateContainerErrors); + } + } + + protected void addToUpdatedContainers(AllocateResponse allocateResponse, + ContainerUpdateType updateType, List updatedContainers) { + if (updatedContainers != null && updatedContainers.size() > 0) { + ArrayList containersToSet = new ArrayList<>(); + if (allocateResponse.getUpdatedContainers() != null && + !allocateResponse.getUpdatedContainers().isEmpty()) { + containersToSet.addAll(allocateResponse.getUpdatedContainers()); + } + for (Container updatedContainer : updatedContainers) { + containersToSet.add( + UpdatedContainer.newInstance(updateType, updatedContainer)); + } + allocateResponse.setUpdatedContainers(containersToSet); + } + } + + protected void addToAllocatedContainers(AllocateResponse allocateResponse, + List allocatedContainers) { + if (allocateResponse.getAllocatedContainers() != null + && !allocateResponse.getAllocatedContainers().isEmpty()) { + allocatedContainers = new ArrayList<>(allocatedContainers); + allocatedContainers.addAll(allocateResponse.getAllocatedContainers()); + } + allocateResponse.setAllocatedContainers(allocatedContainers); + } + private PreemptionMessage generatePreemptionMessage(Allocation allocation){ PreemptionMessage pMsg = null; // assemble strict preemption request diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java index a473b14..708b481 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java @@ -24,9 +24,11 @@ import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.yarn.api.ApplicationMasterProtocolPB; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocol; import org.apache.hadoop.yarn.api.impl.pb.service.ApplicationMasterProtocolPBServiceImpl; @@ -41,18 +43,20 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.proto.ApplicationMasterProtocol.ApplicationMasterProtocolService; + +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed.NodeQueueLoadMonitor; @@ -65,17 +69,21 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; + +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext; +import org.apache.hadoop.yarn.server.utils.YarnServerSecurityUtils; + import java.io.IOException; import java.net.InetSocketAddress; -import java.util.HashSet; +import java.util.ArrayList; import java.util.List; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; /** * The OpportunisticContainerAllocatorAMService is started instead of the - * ApplicationMasterService if distributed scheduling is enabled for the YARN - * cluster. + * ApplicationMasterService if opportunistic scheduling is enabled for the YARN + * cluster (either centralized or distributed opportunistic scheduling). + * * It extends the functionality of the ApplicationMasterService by servicing * clients (AMs and AMRMProxy request interceptors) that understand the * DistributedSchedulingProtocol. @@ -88,37 +96,42 @@ LogFactory.getLog(OpportunisticContainerAllocatorAMService.class); private final NodeQueueLoadMonitor nodeMonitor; + private final OpportunisticContainerAllocator oppContainerAllocator; - private final ConcurrentHashMap> rackToNode = - new ConcurrentHashMap<>(); - private final ConcurrentHashMap> hostToNode = - new ConcurrentHashMap<>(); private final int k; + private final long cacheRefreshInterval; + private volatile List cachedNodes; + private volatile long lastCacheUpdateTime; + public OpportunisticContainerAllocatorAMService(RMContext rmContext, YarnScheduler scheduler) { super(OpportunisticContainerAllocatorAMService.class.getName(), rmContext, scheduler); + this.oppContainerAllocator = new OpportunisticContainerAllocator( + rmContext.getContainerTokenSecretManager()); this.k = rmContext.getYarnConfiguration().getInt( YarnConfiguration.OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED, - YarnConfiguration.OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED_DEFAULT); + YarnConfiguration.DEFAULT_OPP_CONTAINER_ALLOCATION_NODES_NUMBER_USED); long nodeSortInterval = rmContext.getYarnConfiguration().getLong( YarnConfiguration.NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS, YarnConfiguration. - NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS_DEFAULT); + DEFAULT_NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS); + this.cacheRefreshInterval = nodeSortInterval; + this.lastCacheUpdateTime = System.currentTimeMillis(); NodeQueueLoadMonitor.LoadComparator comparator = NodeQueueLoadMonitor.LoadComparator.valueOf( rmContext.getYarnConfiguration().get( YarnConfiguration.NM_CONTAINER_QUEUING_LOAD_COMPARATOR, YarnConfiguration. - NM_CONTAINER_QUEUING_LOAD_COMPARATOR_DEFAULT)); + DEFAULT_NM_CONTAINER_QUEUING_LOAD_COMPARATOR)); NodeQueueLoadMonitor topKSelector = new NodeQueueLoadMonitor(nodeSortInterval, comparator); float sigma = rmContext.getYarnConfiguration() .getFloat(YarnConfiguration.NM_CONTAINER_QUEUING_LIMIT_STDEV, - YarnConfiguration.NM_CONTAINER_QUEUING_LIMIT_STDEV_DEFAULT); + YarnConfiguration.DEFAULT_NM_CONTAINER_QUEUING_LIMIT_STDEV); int limitMin, limitMax; @@ -126,22 +139,22 @@ public OpportunisticContainerAllocatorAMService(RMContext rmContext, limitMin = rmContext.getYarnConfiguration() .getInt(YarnConfiguration.NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH, YarnConfiguration. - NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH_DEFAULT); + DEFAULT_NM_CONTAINER_QUEUING_MIN_QUEUE_LENGTH); limitMax = rmContext.getYarnConfiguration() .getInt(YarnConfiguration.NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH, YarnConfiguration. - NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH_DEFAULT); + DEFAULT_NM_CONTAINER_QUEUING_MAX_QUEUE_LENGTH); } else { limitMin = rmContext.getYarnConfiguration() .getInt( YarnConfiguration.NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS, YarnConfiguration. - NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS_DEFAULT); + DEFAULT_NM_CONTAINER_QUEUING_MIN_QUEUE_WAIT_TIME_MS); limitMax = rmContext.getYarnConfiguration() .getInt( YarnConfiguration.NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS, YarnConfiguration. - NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS_DEFAULT); + DEFAULT_NM_CONTAINER_QUEUING_MAX_QUEUE_WAIT_TIME_MS); } topKSelector.initThresholdCalculator(sigma, limitMin, limitMax); @@ -172,6 +185,28 @@ public Server getServer(YarnRPC rpc, Configuration serverConf, public RegisterApplicationMasterResponse registerApplicationMaster (RegisterApplicationMasterRequest request) throws YarnException, IOException { + final ApplicationAttemptId appAttemptId = getAppAttemptId(); + final SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) + rmContext.getScheduler()).getApplicationAttempt(appAttemptId); + if (appAttempt.getOpportunisticContainerContext() == null) { + OpportunisticContainerContext opCtx = new OpportunisticContainerContext(); + opCtx.setContainerIdGenerator(new OpportunisticContainerAllocator + .ContainerIdGenerator() { + @Override + public long generateContainerId() { + return appAttempt.getAppSchedulingInfo().getNewContainerId(); + } + }); + int tokenExpiryInterval = getConfig() + .getInt(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS); + opCtx.updateAllocationParams( + rmContext.getScheduler().getMinimumResourceCapability(), + rmContext.getScheduler().getMaximumResourceCapability(), + rmContext.getScheduler().getMinimumResourceCapability(), + tokenExpiryInterval); + appAttempt.setOpportunisticContainerContext(opCtx); + } return super.registerApplicationMaster(request); } @@ -183,14 +218,45 @@ public Server getServer(YarnRPC rpc, Configuration serverConf, } @Override - public AllocateResponse allocate(AllocateRequest request) throws - YarnException, IOException { - return super.allocate(request); + protected void allocateInternal(ApplicationAttemptId appAttemptId, + AllocateRequest request, AllocateResponse allocateResponse) + throws YarnException { + + // Partition requests to GUARANTEED and OPPORTUNISTIC. + OpportunisticContainerAllocator.PartitionedResourceRequests + partitionedAsks = + oppContainerAllocator.partitionAskList(request.getAskList()); + + // Allocate OPPORTUNISTIC containers. + SchedulerApplicationAttempt appAttempt = + ((AbstractYarnScheduler)rmContext.getScheduler()) + .getApplicationAttempt(appAttemptId); + + OpportunisticContainerContext oppCtx = + appAttempt.getOpportunisticContainerContext(); + oppCtx.updateNodeList(getLeastLoadedNodes()); + + List oppContainers = + oppContainerAllocator.allocateContainers( + request.getResourceBlacklistRequest(), + partitionedAsks.getOpportunistic(), appAttemptId, oppCtx, + ResourceManager.getClusterTimeStamp(), appAttempt.getUser()); + + // Create RMContainers and update the NMTokens. + if (!oppContainers.isEmpty()) { + handleNewContainers(oppContainers, false); + appAttempt.updateNMTokens(oppContainers); + addToAllocatedContainers(allocateResponse, oppContainers); + } + + // Allocate GUARANTEED containers. + request.setAskList(partitionedAsks.getGuaranteed()); + super.allocateInternal(appAttemptId, request, allocateResponse); } @Override public RegisterDistributedSchedulingAMResponse - registerApplicationMasterForDistributedScheduling( + registerApplicationMasterForDistributedScheduling( RegisterApplicationMasterRequest request) throws YarnException, IOException { RegisterApplicationMasterResponse response = @@ -199,49 +265,18 @@ public AllocateResponse allocate(AllocateRequest request) throws .newRecordInstance(RegisterDistributedSchedulingAMResponse.class); dsResp.setRegisterResponse(response); dsResp.setMinContainerResource( - Resource.newInstance( - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MIN_MEMORY_MB, - YarnConfiguration. - OPPORTUNISTIC_CONTAINERS_MIN_MEMORY_MB_DEFAULT), - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MIN_VCORES, - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MIN_VCORES_DEFAULT) - ) - ); + rmContext.getScheduler().getMinimumResourceCapability()); dsResp.setMaxContainerResource( - Resource.newInstance( - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MAX_MEMORY_MB, - YarnConfiguration - .OPPORTUNISTIC_CONTAINERS_MAX_MEMORY_MB_DEFAULT), - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MAX_VCORES, - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_MAX_VCORES_DEFAULT) - ) - ); - dsResp.setIncrContainerResource( - Resource.newInstance( - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_INCR_MEMORY_MB, - YarnConfiguration. - OPPORTUNISTIC_CONTAINERS_INCR_MEMORY_MB_DEFAULT), - getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_INCR_VCORES, - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_INCR_VCORES_DEFAULT) - ) - ); + rmContext.getScheduler().getMaximumResourceCapability()); dsResp.setContainerTokenExpiryInterval( getConfig().getInt( - YarnConfiguration.OPPORTUNISTIC_CONTAINERS_TOKEN_EXPIRY_MS, - YarnConfiguration. - OPPORTUNISTIC_CONTAINERS_TOKEN_EXPIRY_MS_DEFAULT)); + YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, + YarnConfiguration.DEFAULT_RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS)); dsResp.setContainerIdStart( this.rmContext.getEpoch() << ResourceManager.EPOCH_BIT_SHIFT); // Set nodes to be used for scheduling - dsResp.setNodesForScheduling( - this.nodeMonitor.selectLeastLoadedNodes(this.k)); + dsResp.setNodesForScheduling(getLeastLoadedNodes()); return dsResp; } @@ -250,46 +285,31 @@ public DistributedSchedulingAllocateResponse allocateForDistributedScheduling( DistributedSchedulingAllocateRequest request) throws YarnException, IOException { List distAllocContainers = request.getAllocatedContainers(); - for (Container container : distAllocContainers) { + handleNewContainers(distAllocContainers, true); + AllocateResponse response = allocate(request.getAllocateRequest()); + DistributedSchedulingAllocateResponse dsResp = recordFactory + .newRecordInstance(DistributedSchedulingAllocateResponse.class); + dsResp.setAllocateResponse(response); + dsResp.setNodesForScheduling(getLeastLoadedNodes()); + return dsResp; + } + + private void handleNewContainers(List allocContainers, + boolean isRemotelyAllocated) { + for (Container container : allocContainers) { // Create RMContainer SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) rmContext.getScheduler()) .getCurrentAttemptForContainer(container.getId()); RMContainer rmContainer = new RMContainerImpl(container, appAttempt.getApplicationAttemptId(), container.getNodeId(), - appAttempt.getUser(), rmContext, true); + appAttempt.getUser(), rmContext, isRemotelyAllocated); appAttempt.addRMContainer(container.getId(), rmContainer); + ((AbstractYarnScheduler) rmContext.getScheduler()).getNode( + container.getNodeId()).allocateContainer(rmContainer); rmContainer.handle( new RMContainerEvent(container.getId(), - RMContainerEventType.LAUNCHED)); - } - AllocateResponse response = allocate(request.getAllocateRequest()); - DistributedSchedulingAllocateResponse dsResp = recordFactory - .newRecordInstance(DistributedSchedulingAllocateResponse.class); - dsResp.setAllocateResponse(response); - dsResp.setNodesForScheduling( - this.nodeMonitor.selectLeastLoadedNodes(this.k)); - return dsResp; - } - - private void addToMapping(ConcurrentHashMap> mapping, - String rackName, NodeId nodeId) { - if (rackName != null) { - mapping.putIfAbsent(rackName, new HashSet()); - Set nodeIds = mapping.get(rackName); - synchronized (nodeIds) { - nodeIds.add(nodeId); - } - } - } - - private void removeFromMapping(ConcurrentHashMap> mapping, - String rackName, NodeId nodeId) { - if (rackName != null) { - Set nodeIds = mapping.get(rackName); - synchronized (nodeIds) { - nodeIds.remove(nodeId); - } + RMContainerEventType.ACQUIRED)); } } @@ -303,10 +323,6 @@ public void handle(SchedulerEvent event) { NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent) event; nodeMonitor.addNode(nodeAddedEvent.getContainerReports(), nodeAddedEvent.getAddedRMNode()); - addToMapping(rackToNode, nodeAddedEvent.getAddedRMNode().getRackName(), - nodeAddedEvent.getAddedRMNode().getNodeID()); - addToMapping(hostToNode, nodeAddedEvent.getAddedRMNode().getHostName(), - nodeAddedEvent.getAddedRMNode().getNodeID()); break; case NODE_REMOVED: if (!(event instanceof NodeRemovedSchedulerEvent)) { @@ -315,12 +331,6 @@ public void handle(SchedulerEvent event) { NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent) event; nodeMonitor.removeNode(nodeRemovedEvent.getRemovedRMNode()); - removeFromMapping(rackToNode, - nodeRemovedEvent.getRemovedRMNode().getRackName(), - nodeRemovedEvent.getRemovedRMNode().getNodeID()); - removeFromMapping(hostToNode, - nodeRemovedEvent.getRemovedRMNode().getHostName(), - nodeRemovedEvent.getRemovedRMNode().getNodeID()); break; case NODE_UPDATE: if (!(event instanceof NodeUpdateSchedulerEvent)) { @@ -364,4 +374,43 @@ public void handle(SchedulerEvent event) { public QueueLimitCalculator getNodeManagerQueueLimitCalculator() { return nodeMonitor.getThresholdCalculator(); } + + private synchronized List getLeastLoadedNodes() { + long currTime = System.currentTimeMillis(); + if ((currTime - lastCacheUpdateTime > cacheRefreshInterval) + || (cachedNodes == null)) { + cachedNodes = convertToRemoteNodes( + this.nodeMonitor.selectLeastLoadedNodes(this.k)); + if (cachedNodes.size() > 0) { + lastCacheUpdateTime = currTime; + } + } + return cachedNodes; + } + + private List convertToRemoteNodes(List nodeIds) { + ArrayList retNodes = new ArrayList<>(); + for (NodeId nId : nodeIds) { + RemoteNode remoteNode = convertToRemoteNode(nId); + if (null != remoteNode) { + retNodes.add(remoteNode); + } + } + return retNodes; + } + + private RemoteNode convertToRemoteNode(NodeId nodeId) { + SchedulerNode node = + ((AbstractYarnScheduler) rmContext.getScheduler()).getNode(nodeId); + return node != null ? RemoteNode.newInstance(nodeId, node.getHttpAddress()) + : null; + } + + private static ApplicationAttemptId getAppAttemptId() throws YarnException { + AMRMTokenIdentifier amrmTokenIdentifier = + YarnServerSecurityUtils.authorizeRequest(); + ApplicationAttemptId applicationAttemptId = + amrmTokenIdentifier.getApplicationAttemptId(); + return applicationAttemptId; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index d60dcdc..76b3977 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -1160,6 +1160,13 @@ protected ApplicationMasterService createApplicationMasterService() { Configuration config = this.rmContext.getYarnConfiguration(); if (YarnConfiguration.isOpportunisticContainerAllocationEnabled(config) || YarnConfiguration.isDistSchedulingEnabled(config)) { + if (YarnConfiguration.isDistSchedulingEnabled(config) && + !YarnConfiguration + .isOpportunisticContainerAllocationEnabled(config)) { + throw new YarnRuntimeException( + "Invalid parameters: opportunistic container allocation has to " + + "be enabled when distributed scheduling is enabled."); + } OpportunisticContainerAllocatorAMService oppContainerAllocatingAMService = new OpportunisticContainerAllocatorAMService(this.rmContext, @@ -1169,9 +1176,8 @@ protected ApplicationMasterService createApplicationMasterService() { OpportunisticContainerAllocatorAMService.class.getName()); // Add an event dispatcher for the // OpportunisticContainerAllocatorAMService to handle node - // updates/additions and removals. - // Since the SchedulerEvent is currently a super set of theses, - // we register interest for it.. + // additions, updates and removals. Since the SchedulerEvent is currently + // a super set of theses, we register interest for it. addService(oppContainerAllocEventDispatcher); rmDispatcher.register(SchedulerEventType.class, oppContainerAllocEventDispatcher); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java index a244ad8..020764b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java @@ -25,14 +25,13 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.ContainerState; -import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 10081b4..0afd765 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode .RMNodeDecreaseContainerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.state.InvalidStateTransitionException; import org.apache.hadoop.yarn.state.MultipleArcTransition; import org.apache.hadoop.yarn.state.SingleArcTransition; @@ -80,8 +80,8 @@ RMContainerEventType.KILL) .addTransition(RMContainerState.NEW, RMContainerState.RESERVED, RMContainerEventType.RESERVED, new ContainerReservedTransition()) - .addTransition(RMContainerState.NEW, RMContainerState.RUNNING, - RMContainerEventType.LAUNCHED) + .addTransition(RMContainerState.NEW, RMContainerState.ACQUIRED, + RMContainerEventType.ACQUIRED, new AcquiredTransition()) .addTransition(RMContainerState.NEW, EnumSet.of(RMContainerState.RUNNING, RMContainerState.COMPLETED), RMContainerEventType.RECOVER, new ContainerRecoveredTransition()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerReservedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerReservedEvent.java index d7d1e94..80e7c0b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerReservedEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerReservedEvent.java @@ -21,7 +21,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; /** * The event signifying that a container has been reserved. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index f2ac0f9..7d34389 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; /** * Node managers information on available resources @@ -176,7 +176,7 @@ public void updateNodeHeartbeatResponseForContainersDecreasing( public List pullNewlyIncreasedContainers(); - QueuedContainersStatus getQueuedContainersStatus(); + OpportunisticContainersStatus getOpportunisticContainersStatus(); long getUntrackedTimeStamp(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 8a04a48..0de4d48 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -61,7 +61,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics; import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEvent; @@ -137,7 +137,7 @@ private volatile Resource physicalResource; /* Container Queue Information for the node.. Used by Distributed Scheduler */ - private QueuedContainersStatus queuedContainersStatus; + private OpportunisticContainersStatus opportunisticContainersStatus; private final ContainerAllocationExpirer containerAllocationExpirer; /* set of containers that have just launched */ @@ -1190,7 +1190,8 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; - rmNode.setQueuedContainersStatus(statusEvent.getContainerQueueInfo()); + rmNode.setOpportunisticContainersStatus( + statusEvent.getOpportunisticContainersStatus()); NodeHealthStatus remoteNodeHealthStatus = updateRMNodeFromStatusEvents( rmNode, statusEvent); NodeState initialState = rmNode.getState(); @@ -1393,32 +1394,26 @@ private void handleContainerStatus(List containerStatuses) { } // Process running containers - if (remoteContainer.getState() == ContainerState.RUNNING) { - // Process only GUARANTEED containers in the RM. - if (remoteContainer.getExecutionType() == ExecutionType.GUARANTEED) { - ++numRemoteRunningContainers; - if (!launchedContainers.contains(containerId)) { - // Just launched container. RM knows about it the first time. - launchedContainers.add(containerId); - newlyLaunchedContainers.add(remoteContainer); - // Unregister from containerAllocationExpirer. - containerAllocationExpirer - .unregister(new AllocationExpirationInfo(containerId)); - } - } - } else { - if (remoteContainer.getExecutionType() == ExecutionType.GUARANTEED) { - // A finished container - launchedContainers.remove(containerId); + if (remoteContainer.getState() == ContainerState.RUNNING || + remoteContainer.getState() == ContainerState.SCHEDULED) { + ++numRemoteRunningContainers; + if (!launchedContainers.contains(containerId)) { + // Just launched container. RM knows about it the first time. + launchedContainers.add(containerId); + newlyLaunchedContainers.add(remoteContainer); // Unregister from containerAllocationExpirer. containerAllocationExpirer .unregister(new AllocationExpirationInfo(containerId)); } - // Completed containers should also include the OPPORTUNISTIC containers - // so that the AM gets properly notified. + } else { + // A finished container + launchedContainers.remove(containerId); if (completedContainers.add(containerId)) { newlyCompletedContainers.add(remoteContainer); } + // Unregister from containerAllocationExpirer. + containerAllocationExpirer + .unregister(new AllocationExpirationInfo(containerId)); } } @@ -1511,23 +1506,22 @@ public void setUntrackedTimeStamp(long ts) { this.timeStamp = ts; } - @Override - public QueuedContainersStatus getQueuedContainersStatus() { + public OpportunisticContainersStatus getOpportunisticContainersStatus() { this.readLock.lock(); try { - return this.queuedContainersStatus; + return this.opportunisticContainersStatus; } finally { this.readLock.unlock(); } } - public void setQueuedContainersStatus(QueuedContainersStatus - queuedContainersStatus) { + public void setOpportunisticContainersStatus( + OpportunisticContainersStatus opportunisticContainersStatus) { this.writeLock.lock(); try { - this.queuedContainersStatus = queuedContainersStatus; + this.opportunisticContainersStatus = opportunisticContainersStatus; } finally { this.writeLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java index 5eeaabe..f9fe159 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java @@ -28,7 +28,7 @@ import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus; @@ -80,8 +80,8 @@ public ResourceUtilization getNodeUtilization() { return this.logAggregationReportsForApps; } - public QueuedContainersStatus getContainerQueueInfo() { - return this.nodeStatus.getQueuedContainersStatus(); + public OpportunisticContainersStatus getOpportunisticContainersStatus() { + return this.nodeStatus.getOpportunisticContainersStatus(); } public void setLogAggregationReportsForApps( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 65bbf4b..c1a985d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; @@ -580,7 +581,7 @@ public void completedContainer(RMContainer rmContainer, return; } - if (!rmContainer.isRemotelyAllocated()) { + if (rmContainer.getExecutionType() == ExecutionType.GUARANTEED) { completedContainerInternal(rmContainer, containerStatus, event); } else { ContainerId containerId = rmContainer.getContainerId(); @@ -596,6 +597,8 @@ public void completedContainer(RMContainer rmContainer, LOG.debug("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event); } + getSchedulerNode(rmContainer.getNodeId()).releaseContainer( + rmContainer.getContainer()); } // If the container is getting killed in ACQUIRED state, the requester (AM diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 80811b1..62bd766 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -54,6 +54,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.ResourceRequestUpdateResult; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet; + +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.util.resource.Resources; /** @@ -948,6 +950,7 @@ public void recoverContainer(RMContainer rmContainer) { public ResourceRequest cloneResourceRequest(ResourceRequest request) { ResourceRequest newRequest = ResourceRequest.newBuilder() .priority(request.getPriority()) + .allocationRequestId(request.getAllocationRequestId()) .resourceName(request.getResourceName()) .capability(request.getCapability()) .numContainers(1) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index c480f4c..6a5b090 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -47,10 +47,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -72,10 +72,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerUpdatesAcquiredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity; + +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.state.InvalidStateTransitionException; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -122,6 +123,9 @@ private boolean isAttemptRecovering; protected ResourceUsage attemptResourceUsage = new ResourceUsage(); + /** Resource usage of opportunistic containers. */ + protected ResourceUsage attemptOpportunisticResourceUsage = + new ResourceUsage(); /** Scheduled by a remote scheduler. */ protected ResourceUsage attemptResourceUsageAllocatedRemotely = new ResourceUsage(); @@ -140,6 +144,8 @@ // by NM should not be recovered. private Set pendingRelease = null; + private OpportunisticContainerContext oppContainerContext; + /** * Count how many times the application has been given an opportunity to * schedule a task at each priority. Each time the scheduler asks the @@ -213,7 +219,17 @@ public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId, readLock = lock.readLock(); writeLock = lock.writeLock(); } - + + public void setOpportunisticContainerContext( + OpportunisticContainerContext oppContext) { + this.oppContainerContext = oppContext; + } + + public OpportunisticContainerContext + getOpportunisticContainerContext() { + return this.oppContainerContext; + } + /** * Get the live containers of the application. * @return live containers of the application @@ -345,6 +361,10 @@ public void addRMContainer( try { writeLock.lock(); liveContainers.put(id, rmContainer); + if (rmContainer.getExecutionType() == ExecutionType.OPPORTUNISTIC) { + this.attemptOpportunisticResourceUsage.incUsed( + rmContainer.getAllocatedResource()); + } if (rmContainer.isRemotelyAllocated()) { this.attemptResourceUsageAllocatedRemotely.incUsed( rmContainer.getAllocatedResource()); @@ -358,9 +378,15 @@ public void removeRMContainer(ContainerId containerId) { try { writeLock.lock(); RMContainer rmContainer = liveContainers.remove(containerId); - if (rmContainer != null && rmContainer.isRemotelyAllocated()) { - this.attemptResourceUsageAllocatedRemotely.decUsed( - rmContainer.getAllocatedResource()); + if (rmContainer != null) { + if (rmContainer.getExecutionType() == ExecutionType.OPPORTUNISTIC) { + this.attemptOpportunisticResourceUsage + .decUsed(rmContainer.getAllocatedResource()); + } + if (rmContainer.isRemotelyAllocated()) { + this.attemptResourceUsageAllocatedRemotely + .decUsed(rmContainer.getAllocatedResource()); + } } } finally { writeLock.unlock(); @@ -628,12 +654,7 @@ private Container updateContainerAndNMToken(RMContainer rmContainer, container.getPriority(), rmContainer.getCreationTime(), this.logAggregationContext, rmContainer.getNodeLabelExpression(), containerType)); - NMToken nmToken = - rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), - getApplicationAttemptId(), container); - if (nmToken != null) { - updatedNMTokens.add(nmToken); - } + updateNMToken(container); } catch (IllegalArgumentException e) { // DNS might be down, skip returning this container. LOG.error("Error trying to assign container token and NM token to" @@ -651,6 +672,21 @@ private Container updateContainerAndNMToken(RMContainer rmContainer, return container; } + public void updateNMTokens(Collection containers) { + for (Container container : containers) { + updateNMToken(container); + } + } + + private void updateNMToken(Container container) { + NMToken nmToken = + rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), + getApplicationAttemptId(), container); + if (nmToken != null) { + updatedNMTokens.add(nmToken); + } + } + // Create container token and update NMToken altogether, if either of them fails for // some reason like DNS unavailable, do not return this container and keep it // in the newlyAllocatedContainers waiting to be refetched. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index 2efdbd0..759db05 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -30,6 +30,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceUtilization; @@ -39,6 +40,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.collect.ImmutableSet; @@ -148,8 +150,10 @@ public String getRackName() { */ public synchronized void allocateContainer(RMContainer rmContainer) { Container container = rmContainer.getContainer(); - deductUnallocatedResource(container.getResource()); - ++numContainers; + if (rmContainer.getExecutionType() == ExecutionType.GUARANTEED) { + deductUnallocatedResource(container.getResource()); + ++numContainers; + } launchedContainers.put(container.getId(), rmContainer); @@ -246,8 +250,10 @@ public synchronized boolean isValidContainer(ContainerId containerId) { */ protected synchronized void updateResourceForReleasedContainer( Container container) { - addUnallocatedResource(container.getResource()); - --numContainers; + if (container.getExecutionType() == ExecutionType.GUARANTEED) { + addUnallocatedResource(container.getResource()); + --numContainers; + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 5e4e441..ace0b75 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; @@ -2216,7 +2217,8 @@ public void collectSchedulerApplications( @Override public void attachContainer(Resource clusterResource, FiCaSchedulerApp application, RMContainer rmContainer) { - if (application != null) { + if (application != null && rmContainer != null + && rmContainer.getExecutionType() == ExecutionType.GUARANTEED) { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); allocateResource(clusterResource, application, rmContainer.getContainer() @@ -2234,7 +2236,8 @@ public void attachContainer(Resource clusterResource, @Override public void detachContainer(Resource clusterResource, FiCaSchedulerApp application, RMContainer rmContainer) { - if (application != null) { + if (application != null && rmContainer != null + && rmContainer.getExecutionType() == ExecutionType.GUARANTEED) { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); releaseResource(clusterResource, application, rmContainer.getContainer() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java index 74a64c1..0dc527f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java @@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index f5026ed..1eb48bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/SchedulerContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/SchedulerContainer.java index 8b4907b..159fb09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/SchedulerContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/SchedulerContainer.java @@ -22,7 +22,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; /** * Contexts for a container inside scheduler diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index f076e4f..1022be7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -52,7 +52,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivitiesManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAMContainerLaunchDiagnosticsConstants; @@ -69,6 +68,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.SchedulerContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet; + +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java index d79fcaf..344daf2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java @@ -29,7 +29,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.util.resource.Resources; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/NodeQueueLoadMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/NodeQueueLoadMonitor.java index 017a256..fb67270 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/NodeQueueLoadMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/NodeQueueLoadMonitor.java @@ -24,7 +24,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.ClusterMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -37,6 +37,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantReadWriteLock; /** * The NodeQueueLoadMonitor keeps track of load metrics (such as queue length @@ -59,9 +60,9 @@ @Override public int compare(ClusterNode o1, ClusterNode o2) { if (getMetric(o1) == getMetric(o2)) { - return o1.timestamp < o2.timestamp ? +1 : -1; + return (int)(o2.timestamp - o1.timestamp); } - return getMetric(o1) > getMetric(o2) ? +1 : -1; + return getMetric(o1) - getMetric(o2); } public int getMetric(ClusterNode c) { @@ -103,16 +104,28 @@ public ClusterNode updateTimestamp() { new ConcurrentHashMap<>(); private final LoadComparator comparator; private QueueLimitCalculator thresholdCalculator; + private ReentrantReadWriteLock sortedNodesLock = new ReentrantReadWriteLock(); + private ReentrantReadWriteLock clusterNodesLock = + new ReentrantReadWriteLock(); Runnable computeTask = new Runnable() { @Override public void run() { - synchronized (sortedNodes) { - sortedNodes.clear(); - sortedNodes.addAll(sortNodes()); + ReentrantReadWriteLock.WriteLock writeLock = sortedNodesLock.writeLock(); + writeLock.lock(); + try { + try { + List nodeIds = sortNodes(); + sortedNodes.clear(); + sortedNodes.addAll(nodeIds); + } catch (Exception ex) { + LOG.warn("Got Exception while sorting nodes..", ex); + } if (thresholdCalculator != null) { thresholdCalculator.update(); } + } finally { + writeLock.unlock(); } } }; @@ -156,9 +169,11 @@ public void initThresholdCalculator(float sigma, int limitMin, int limitMax) { } @Override - public void addNode(List containerStatuses, RMNode - rmNode) { - LOG.debug("Node added event from: " + rmNode.getNode().getName()); + public void addNode(List containerStatuses, + RMNode rmNode) { + if (LOG.isDebugEnabled()) { + LOG.debug("Node added event from: " + rmNode.getNode().getName()); + } // Ignoring this currently : at least one NODE_UPDATE heartbeat is // required to ensure node eligibility. } @@ -166,9 +181,16 @@ public void addNode(List containerStatuses, RMNode @Override public void removeNode(RMNode removedRMNode) { LOG.debug("Node delete event for: " + removedRMNode.getNode().getName()); - synchronized (this.clusterNodes) { - if (this.clusterNodes.containsKey(removedRMNode.getNodeID())) { - this.clusterNodes.remove(removedRMNode.getNodeID()); + ReentrantReadWriteLock.WriteLock writeLock = clusterNodesLock.writeLock(); + writeLock.lock(); + ClusterNode node; + try { + node = this.clusterNodes.remove(removedRMNode.getNodeID()); + } finally { + writeLock.unlock(); + } + if (LOG.isDebugEnabled()) { + if (node != null) { LOG.debug("Delete ClusterNode: " + removedRMNode.getNodeID()); } else { LOG.debug("Node not in list!"); @@ -179,14 +201,16 @@ public void removeNode(RMNode removedRMNode) { @Override public void updateNode(RMNode rmNode) { LOG.debug("Node update event from: " + rmNode.getNodeID()); - QueuedContainersStatus queuedContainersStatus = - rmNode.getQueuedContainersStatus(); + OpportunisticContainersStatus opportunisticContainersStatus = + rmNode.getOpportunisticContainersStatus(); int estimatedQueueWaitTime = - queuedContainersStatus.getEstimatedQueueWaitTime(); - int waitQueueLength = queuedContainersStatus.getWaitQueueLength(); + opportunisticContainersStatus.getEstimatedQueueWaitTime(); + int waitQueueLength = opportunisticContainersStatus.getWaitQueueLength(); // Add nodes to clusterNodes. If estimatedQueueTime is -1, ignore node // UNLESS comparator is based on queue length. - synchronized (this.clusterNodes) { + ReentrantReadWriteLock.WriteLock writeLock = clusterNodesLock.writeLock(); + writeLock.lock(); + try { ClusterNode currentNode = this.clusterNodes.get(rmNode.getNodeID()); if (currentNode == null) { if (estimatedQueueWaitTime != -1 @@ -222,6 +246,8 @@ public void updateNode(RMNode rmNode) { "wait queue length [" + currentNode.queueLength + "]"); } } + } finally { + writeLock.unlock(); } } @@ -245,15 +271,22 @@ public void updateNodeResource(RMNode rmNode, ResourceOption resourceOption) { * @return ordered list of nodes */ public List selectLeastLoadedNodes(int k) { - synchronized (this.sortedNodes) { - return ((k < this.sortedNodes.size()) && (k >= 0)) ? + ReentrantReadWriteLock.ReadLock readLock = sortedNodesLock.readLock(); + readLock.lock(); + try { + List retVal = ((k < this.sortedNodes.size()) && (k >= 0)) ? new ArrayList<>(this.sortedNodes).subList(0, k) : new ArrayList<>(this.sortedNodes); + return retVal; + } finally { + readLock.unlock(); } } private List sortNodes() { - synchronized (this.clusterNodes) { + ReentrantReadWriteLock.ReadLock readLock = clusterNodesLock.readLock(); + readLock.lock(); + try { ArrayList aList = new ArrayList<>(this.clusterNodes.values()); List retList = new ArrayList<>(); Object[] nodes = aList.toArray(); @@ -267,6 +300,8 @@ public void updateNodeResource(RMNode rmNode, ResourceOption resourceOption) { retList.add(((ClusterNode)nodes[j]).nodeId); } return retList; + } finally { + readLock.unlock(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 0686bc2..94030e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -56,7 +56,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java index 024ec67..85aab9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java @@ -26,7 +26,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; @Private diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoAppAttempt.java index d275bfd..e60f70e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoAppAttempt.java @@ -33,10 +33,12 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; + +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; + import java.util.List; public class FifoAppAttempt extends FiCaSchedulerApp { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 657ac03..f0c0942 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -69,7 +69,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; @@ -709,7 +709,7 @@ private int assignContainer(FiCaSchedulerNode node, FifoAppAttempt application, // Inform the application RMContainer rmContainer = application.allocate(type, node, schedulerKey, request, container); - + // Inform the node node.allocateContainer(rmContainer); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/SchedulingPlacementSet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/SchedulingPlacementSet.java index f87f764..86843b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/SchedulingPlacementSet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/placement/SchedulingPlacementSet.java @@ -21,7 +21,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import java.util.Iterator; import java.util.List; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 7063421..c03df63 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -18,17 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; -import static org.apache.hadoop.yarn.webapp.YarnWebParams.NODE_LABEL; -import static org.apache.hadoop.yarn.webapp.YarnWebParams.NODE_STATE; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; - -import java.util.Collection; - +import com.google.inject.Inject; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -42,18 +35,30 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; -import com.google.inject.Inject; +import java.util.Collection; + +import static org.apache.hadoop.yarn.webapp.YarnWebParams.NODE_LABEL; +import static org.apache.hadoop.yarn.webapp.YarnWebParams.NODE_STATE; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID; +import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; class NodesPage extends RmView { static class NodesBlock extends HtmlBlock { final ResourceManager rm; private static final long BYTES_IN_MB = 1024 * 1024; + private static final long BYTES_IN_GB = 1024 * 1024 * 1024; + private static boolean opportunisticContainersEnabled; @Inject NodesBlock(ResourceManager rm, ViewContext ctx) { super(ctx); this.rm = rm; + this.opportunisticContainersEnabled = YarnConfiguration + .isOpportunisticContainerAllocationEnabled( + this.rm.getRMContext().getYarnConfiguration()); } @Override @@ -61,9 +66,10 @@ protected void render(Block html) { html._(MetricsOverviewTable.class); ResourceScheduler sched = rm.getResourceScheduler(); + String type = $(NODE_STATE); String labelFilter = $(NODE_LABEL, CommonNodeLabelsManager.ANY).trim(); - TBODY> tbody = + Hamlet.TR>> trbody = html.table("#nodes").thead().tr() .th(".nodelabels", "Node Labels") .th(".rack", "Rack") @@ -71,13 +77,29 @@ protected void render(Block html) { .th(".nodeaddress", "Node Address") .th(".nodehttpaddress", "Node HTTP Address") .th(".lastHealthUpdate", "Last health-update") - .th(".healthReport", "Health-report") - .th(".containers", "Containers") - .th(".mem", "Mem Used") - .th(".mem", "Mem Avail") - .th(".vcores", "VCores Used") - .th(".vcores", "VCores Avail") - .th(".nodeManagerVersion", "Version")._()._().tbody(); + .th(".healthReport", "Health-report"); + + if (!this.opportunisticContainersEnabled) { + trbody.th(".containers", "Containers") + .th(".mem", "Mem Used") + .th(".mem", "Mem Avail") + .th(".vcores", "VCores Used") + .th(".vcores", "VCores Avail"); + } else { + trbody.th(".containers", "Running Containers (G)") + .th(".mem", "Mem Used (G)") + .th(".mem", "Mem Avail (G)") + .th(".vcores", "VCores Used (G)") + .th(".vcores", "VCores Avail (G)") + .th(".containers", "Running Containers (O)") + .th(".mem", "Mem Used (O)") + .th(".vcores", "VCores Used (O)") + .th(".containers", "Queued Containers"); + } + + TBODY> tbody = + trbody.th(".nodeManagerVersion", "Version")._()._().tbody(); + NodeState stateFilter = null; if (type != null && !type.isEmpty()) { stateFilter = NodeState.valueOf(StringUtils.toUpperCase(type)); @@ -153,7 +175,24 @@ protected void render(Block html) { .append("\",\"").append(String.valueOf(info.getUsedVirtualCores())) .append("\",\"") .append(String.valueOf(info.getAvailableVirtualCores())) - .append("\",\"").append(ni.getNodeManagerVersion()) + .append("\",\""); + + // If opportunistic containers are enabled, add extra fields. + if (this.opportunisticContainersEnabled) { + nodeTableData + .append(String.valueOf(info.getNumRunningOpportContainers())) + .append("\",\"").append("
") + .append(StringUtils.byteDesc( + info.getUsedMemoryOpportGB() * BYTES_IN_GB)) + .append("\",\"") + .append(String.valueOf(info.getUsedVirtualCoresOpport())) + .append("\",\"") + .append(String.valueOf(info.getNumQueuedContainers())) + .append("\",\""); + } + + nodeTableData.append(ni.getNodeManagerVersion()) .append("\"],\n"); } if (nodeTableData.charAt(nodeTableData.length() - 2) == ',') { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java index 4a6aa4b..3416e52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java @@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; @@ -49,6 +50,10 @@ protected long availMemoryMB; protected long usedVirtualCores; protected long availableVirtualCores; + private int numRunningOpportContainers; + private long usedMemoryOpportGB; + private long usedVirtualCoresOpport; + private int numQueuedContainers; protected ArrayList nodeLabels = new ArrayList(); protected ResourceUtilizationInfo resourceUtilization; @@ -66,7 +71,8 @@ public NodeInfo(RMNode ni, ResourceScheduler sched) { this.usedMemoryMB = report.getUsedResource().getMemorySize(); this.availMemoryMB = report.getAvailableResource().getMemorySize(); this.usedVirtualCores = report.getUsedResource().getVirtualCores(); - this.availableVirtualCores = report.getAvailableResource().getVirtualCores(); + this.availableVirtualCores = + report.getAvailableResource().getVirtualCores(); } this.id = id.toString(); this.rack = ni.getRackName(); @@ -76,7 +82,22 @@ public NodeInfo(RMNode ni, ResourceScheduler sched) { this.lastHealthUpdate = ni.getLastHealthReportTime(); this.healthReport = String.valueOf(ni.getHealthReport()); this.version = ni.getNodeManagerVersion(); - + + // Status of opportunistic containers. + this.numRunningOpportContainers = 0; + this.usedMemoryOpportGB = 0; + this.usedVirtualCoresOpport = 0; + this.numQueuedContainers = 0; + OpportunisticContainersStatus opportStatus = + ni.getOpportunisticContainersStatus(); + if (opportStatus != null) { + this.numRunningOpportContainers = + opportStatus.getRunningOpportContainers(); + this.usedMemoryOpportGB = opportStatus.getOpportMemoryUsed(); + this.usedVirtualCoresOpport = opportStatus.getOpportCoresUsed(); + this.numQueuedContainers = opportStatus.getQueuedOpportContainers(); + } + // add labels Set labelSet = ni.getNodeLabels(); if (labelSet != null) { @@ -140,6 +161,22 @@ public long getAvailableVirtualCores() { return this.availableVirtualCores; } + public int getNumRunningOpportContainers() { + return numRunningOpportContainers; + } + + public long getUsedMemoryOpportGB() { + return usedMemoryOpportGB; + } + + public long getUsedVirtualCoresOpport() { + return usedVirtualCoresOpport; + } + + public int getNumQueuedContainers() { + return numQueuedContainers; + } + public ArrayList getNodeLabels() { return this.nodeLabels; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index e70c3e0..3288d39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -61,7 +61,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index bc2c4c3..f2e9c73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; @@ -263,6 +263,11 @@ public ResourceUtilization getNodeUtilization() { } @Override + public OpportunisticContainersStatus getOpportunisticContainersStatus() { + return null; + } + + @Override public long getUntrackedTimeStamp() { return 0; } @@ -273,11 +278,6 @@ public void setUntrackedTimeStamp(long timeStamp) { } @Override - public QueuedContainersStatus getQueuedContainersStatus() { - return null; - } - - @Override public Integer getDecommissioningTimeout() { return null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index a66b093..02d3956 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -947,7 +947,7 @@ protected void serviceStop() { protected ApplicationMasterService createApplicationMasterService() { if (this.rmContext.getYarnConfiguration().getBoolean( YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, - YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED_DEFAULT)) { + YarnConfiguration.DEFAULT_OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED)) { return new OpportunisticContainerAllocatorAMService(getRMContext(), scheduler) { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Task.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Task.java index 35218bd..31b372e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Task.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Task.java @@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity .TestUtils; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java index 07c6b54..4ed92f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestOpportunisticContainerAllocatorAMService.java @@ -57,13 +57,27 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC; import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.server.api.protocolrecords.RemoteNode; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.DistributedSchedulingAllocateRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.DistributedSchedulingAllocateResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterDistributedSchedulingAMResponsePBImpl; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; - +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext; +import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Assert; import org.junit.Test; +import org.mockito.Mockito; import java.io.IOException; import java.net.InetSocketAddress; @@ -75,11 +89,94 @@ */ public class TestOpportunisticContainerAllocatorAMService { + private static final int GB = 1024; + + @Test(timeout = 60000) + public void testNodeRemovalDuringAllocate() throws Exception { + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + YarnConfiguration conf = new YarnConfiguration(csConf); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.setBoolean( + YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true); + conf.setInt( + YarnConfiguration.NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS, 100); + MockRM rm = new MockRM(conf); + rm.start(); + MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); + OpportunisticContainerAllocatorAMService amservice = + (OpportunisticContainerAllocatorAMService) rm + .getApplicationMasterService(); + RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default"); + ApplicationAttemptId attemptId = + app1.getCurrentAppAttempt().getAppAttemptId(); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2); + ResourceScheduler scheduler = rm.getResourceScheduler(); + RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); + RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId()); + nm1.nodeHeartbeat(true); + nm2.nodeHeartbeat(true); + ((RMNodeImpl) rmNode1) + .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100)); + ((RMNodeImpl) rmNode2) + .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100)); + OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler) + .getApplicationAttempt(attemptId).getOpportunisticContainerContext(); + // Send add and update node events to AM Service. + amservice.handle(new NodeAddedSchedulerEvent(rmNode1)); + amservice.handle(new NodeAddedSchedulerEvent(rmNode2)); + amservice.handle(new NodeUpdateSchedulerEvent(rmNode1)); + amservice.handle(new NodeUpdateSchedulerEvent(rmNode2)); + // Both node 1 and node 2 will be applicable for scheduling. + for (int i = 0; i < 10; i++) { + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(1 * GB), 2)), + null); + if (ctxt.getNodeMap().size() == 2) { + break; + } + Thread.sleep(50); + } + Assert.assertEquals(2, ctxt.getNodeMap().size()); + // Remove node from scheduler but not from AM Service. + scheduler.handle(new NodeRemovedSchedulerEvent(rmNode1)); + // After removal of node 1, only 1 node will be applicable for scheduling. + for (int i = 0; i < 10; i++) { + try { + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(1 * GB), 2)), + null); + } catch (Exception e) { + Assert.fail("Allocate request should be handled on node removal"); + } + if (ctxt.getNodeMap().size() == 1) { + break; + } + Thread.sleep(50); + } + Assert.assertEquals(1, ctxt.getNodeMap().size()); + } + + private OpportunisticContainersStatus getOppurtunisticStatus(int waitTime, + int queueLength) { + OpportunisticContainersStatus status1 = + Mockito.mock(OpportunisticContainersStatus.class); + Mockito.when(status1.getEstimatedQueueWaitTime()).thenReturn(waitTime); + Mockito.when(status1.getWaitQueueLength()).thenReturn(queueLength); + return status1; + } + // Test if the OpportunisticContainerAllocatorAMService can handle both // DSProtocol as well as AMProtocol clients @Test public void testRPCWrapping() throws Exception { - Configuration conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class .getName()); YarnRPC rpc = YarnRPC.create(conf); @@ -97,6 +194,11 @@ public AMLivelinessMonitor getAMLivelinessMonitor() { public Configuration getYarnConfiguration() { return new YarnConfiguration(); } + + @Override + public RMContainerTokenSecretManager getContainerTokenSecretManager() { + return new RMContainerTokenSecretManager(conf); + } }; Container c = factory.newRecordInstance(Container.class); c.setExecutionType(ExecutionType.OPPORTUNISTIC); @@ -117,8 +219,8 @@ public Configuration getYarnConfiguration() { Server server = service.getServer(rpc, conf, addr, null); server.start(); - // Verify that the DistrubutedSchedulingService can handle vanilla - // ApplicationMasterProtocol clients + // Verify that the OpportunisticContainerAllocatorAMSercvice can handle + // vanilla ApplicationMasterProtocol clients RPC.setProtocolEngine(conf, ApplicationMasterProtocolPB.class, ProtobufRpcEngine.class); ApplicationMasterProtocolPB ampProxy = @@ -184,7 +286,7 @@ public Configuration getYarnConfiguration() { dsProxy.allocateForDistributedScheduling(null, distAllReq.getProto())); Assert.assertEquals( - "h1", dsAllocResp.getNodesForScheduling().get(0).getHost()); + "h1", dsAllocResp.getNodesForScheduling().get(0).getNodeId().getHost()); FinishApplicationMasterResponse dsfinishResp = new FinishApplicationMasterResponsePBImpl( @@ -263,7 +365,8 @@ public AllocateResponse allocate(AllocateRequest request) throws DistributedSchedulingAllocateResponse resp = factory .newRecordInstance(DistributedSchedulingAllocateResponse.class); resp.setNodesForScheduling( - Arrays.asList(NodeId.newInstance("h1", 1234))); + Arrays.asList(RemoteNode.newInstance( + NodeId.newInstance("h1", 1234), "http://h1:4321"))); return resp; } }; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java index 0281c19..7255191 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java @@ -36,8 +36,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler - .SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java index 881004c..41079fa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java @@ -35,8 +35,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler - .SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAppSchedulingInfo.java index 7f9c719..468e760 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAppSchedulingInfo.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.junit.Assert; import org.junit.Test; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java index 3cb668c..ff5dc02 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java @@ -41,6 +41,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; + +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.junit.After; import org.junit.Test; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 0aeedce..4b90ad0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -120,7 +120,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 37ccdae..e593973 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -77,7 +77,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueStateManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index 3e05456..c9eb8b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -60,7 +60,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ResourceCommitRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java index b982fab..e81ffbd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java @@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestNodeQueueLoadMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestNodeQueueLoadMonitor.java index 5f63923..dfd21ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestNodeQueueLoadMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/distributed/TestNodeQueueLoadMonitor.java @@ -20,7 +20,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.api.records.ContainerQueuingLimit; -import org.apache.hadoop.yarn.server.api.records.QueuedContainersStatus; +import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.junit.Assert; import org.junit.Test; @@ -183,13 +183,13 @@ private RMNode createRMNode(String host, int port, RMNode node1 = Mockito.mock(RMNode.class); NodeId nID1 = new FakeNodeId(host, port); Mockito.when(node1.getNodeID()).thenReturn(nID1); - QueuedContainersStatus status1 = - Mockito.mock(QueuedContainersStatus.class); + OpportunisticContainersStatus status1 = + Mockito.mock(OpportunisticContainersStatus.class); Mockito.when(status1.getEstimatedQueueWaitTime()) .thenReturn(waitTime); Mockito.when(status1.getWaitQueueLength()) .thenReturn(queueLength); - Mockito.when(node1.getQueuedContainersStatus()).thenReturn(status1); + Mockito.when(node1.getOpportunisticContainersStatus()).thenReturn(status1); return node1; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index 5964d2f..4cc99b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -30,7 +30,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java index 61c5743..46187d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java @@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity .TestUtils; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java index 2cbe507..a6ee190 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java @@ -34,13 +34,14 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity .TestUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; + +import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.util.ControlledClock; import org.apache.hadoop.yarn.util.resource.Resources; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 71d077b..cc97674 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -21,6 +21,7 @@ import java.io.PrintWriter; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock; @@ -49,6 +50,7 @@ // future. In that case this value should be adjusted to the new value. final int numberOfThInMetricsTable = 23; final int numberOfActualTableHeaders = 13; + private final int numberOfThForOpportunisticContainers = 4; private Injector injector; @@ -135,4 +137,35 @@ public void testNodesBlockRenderForNodeLabelFilterWithAnyLabel() { Mockito.verify(writer, Mockito.times(numberOfThInMetricsTable)) .print(" 0) { return new CustomQueueingContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, dirsHandler); } else { @@ -847,7 +849,9 @@ public CustomContainerManagerImpl(Context context, ContainerExecutor exec, protected void createAMRMProxyService(Configuration conf) { this.amrmProxyEnabled = conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED, - YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED); + YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED) || + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); if (this.amrmProxyEnabled) { LOG.info("CustomAMRMProxyService is enabled. " @@ -864,7 +868,7 @@ protected void createAMRMProxyService(Configuration conf) { } private class CustomQueueingContainerManagerImpl extends - QueuingContainerManagerImpl { + ContainerManagerImpl { public CustomQueueingContainerManagerImpl(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater @@ -874,29 +878,12 @@ public CustomQueueingContainerManagerImpl(Context context, } @Override - protected ContainersMonitor createContainersMonitor(ContainerExecutor - exec) { - return new ContainersMonitorImpl(exec, dispatcher, this.context) { - - @Override - public void increaseContainersAllocation(ProcessTreeInfo pti) { } - - @Override - public void decreaseContainersAllocation(ProcessTreeInfo pti) { } - - @Override - public boolean hasResourcesAvailable( - ContainersMonitorImpl.ProcessTreeInfo pti) { - return true; - } - }; - } - - @Override protected void createAMRMProxyService(Configuration conf) { this.amrmProxyEnabled = conf.getBoolean(YarnConfiguration.AMRM_PROXY_ENABLED, - YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED); + YarnConfiguration.DEFAULT_AMRM_PROXY_ENABLED) || + conf.getBoolean(YarnConfiguration.DIST_SCHEDULING_ENABLED, + YarnConfiguration.DEFAULT_DIST_SCHEDULING_ENABLED); if (this.amrmProxyEnabled) { LOG.info("CustomAMRMProxyService is enabled. " @@ -910,6 +897,32 @@ protected void createAMRMProxyService(Configuration conf) { LOG.info("CustomAMRMProxyService is disabled"); } } + + @Override + protected ContainersMonitor createContainersMonitor(ContainerExecutor + exec) { + return new ContainersMonitorImpl(exec, dispatcher, this.context) { + @Override + public float getVmemRatio() { + return 2.0f; + } + + @Override + public long getVmemAllocatedForContainers() { + return 16 * 1024L * 1024L * 1024L; + } + + @Override + public long getPmemAllocatedForContainers() { + return 8 * 1024L * 1024L * 1024L; + } + + @Override + public long getVCoresAllocatedForContainers() { + return 10; + } + }; + } } private class ShortCircuitedAMRMProxy extends AMRMProxyService { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/OpportunisticContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/OpportunisticContainers.md new file mode 100644 index 0000000..223930e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/OpportunisticContainers.md @@ -0,0 +1,225 @@ + + +Opportunistic Containers +======================== + +* [Purpose](#Purpose) +* [Quick Guide](#Quick_Guide) + * [Main Goal](#Main_Goal) + * [Enabling Opportunistic Containers](#Enabling_Opportunistic_Containers) + * [Running a Sample Job](Running_a_Sample_Job) + * [Opportunistic Containers in Web UI](Opportunistic_Containers_in_Web_UI) +* [Overview](#Overview) +* [Container Execution Types](#Container_Execution_Types) +* [Execution of Opportunistic Containers](#Execution_of_Opportunistic_Containers) +* [Allocation of Opportunistic Containers](#Allocation_of_Opportunistic_Containers) + * [Centralized Allocation](#Centralized_Allocation) + * [Distributed Allocation](#Distributed_Allocation) + * [Determining Nodes for Allocation](#Determining_Nodes_for_Allocation) + * [Rebalancing Node Load](#Rebalancing_Node_Load) +* [Advanced Configuration](#Advanced_Configuration) +* [Items for Future Work](#Items_for_Future_Work) + + +Purpose +----------------------------- + +This document introduces the notion of **opportunistic** container execution, and discusses how opportunistic containers are allocated and executed. + + +Quick Guide +-------------------------------------------------------------------- + +We start by providing a brief overview of opportunistic containers, including how a user can enable this feature and run a sample job using such containers. + +###Main Goal + +Unlike existing YARN containers that are scheduled in a node only if there are unallocated resources, opportunistic containers can be dispatched to an NM, even if their execution at that node cannot start immediately. In such a case, opportunistic containers will be queued at that NM until resources become available. +The main goal of opportunistic container execution is to improve cluster resource utilization, and therefore increase task throughput. Resource utilization and task throughput improvements are more pronounced for workloads that include relatively short tasks (in the order of seconds). + + +###Enabling Opportunistic Containers + +To enable opportunistic container allocation, the following two properties have to be present in **conf/yarn-site.xml**: + +| Property | Description | Default value | +|:-------- |:----- |:----- | +| `yarn.resourcemanager.opportunistic-container-allocation.enabled` | Enables opportunistic container allocation. | `false` | +| `yarn.nodemanager.opportunistic-containers-max-queue-length` | Determines the max number of opportunistic containers that can be queued at an NM. | `0` | + +The first parameter above has to be set to `true`. The second one has to be set to a positive value to allow queuing of opportunistic containers at the NM. A value of `10` can be used to start experimenting with opportunistic containers. The optimal value depends on the jobs characteristics, the cluster configuration and the target utilization. + +By default, allocation of opportunistic containers is performed centrally through the RM. However, a user can choose to enable distributed allocation of opportunistic containers, which can further improve allocation latency for short tasks. Distributed scheduling can be enabling by setting to `true` the following parameter (note that non-opportunistic containers will continue being scheduled through the RM): + +| Property | Description | Default value | +|:-------- |:----- |:----- | +| `yarn.nodemanager.distributed-scheduling.enabled` | Enables distributed scheduling. | `false` | + + +###Running a Sample Job + +The following command can be used to run a sample pi map-reduce job, executing 40% of mappers using opportunistic containers (substitute `3.0.0-alpha2-SNAPSHOT` below with the version of Hadoop you are using): +``` +$ hadoop jar hadoop-3.0.0-alpha2-SNAPSHOT/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.0.0-alpha2-SNAPSHOT.jar pi -Dmapreduce.job.num-opportunistic-maps-percent="40" 50 100 +``` + +By changing the value of `mapreduce.job.num-opportunistic-maps-percent` in the above command, we can specify the percentage of mappers that can be executed through opportunistic containers. + + +###Opportunistic Containers in Web UI + +When opportunistic container allocation is enabled, the following new columns can be observed in the Nodes page of the Web UI (`rm-address:8088/cluster/nodes`): +* Running Containers (O): number of running opportunistic containers on each node; +* Mem Used (O): Total memory used by opportunistic containers on each node; +* VCores Used (O): Total CPU virtual cores used by opportunistic containers on each node; +* Queued Containers: Number of containers queued at each node. + +When clicking on a specific container running on a node, the execution type of the container is also shown. + +In the rest of the document, we provide an in-depth description of opportunistic containers, including details about their allocation and execution. + + +Overview +-------------------------------- + +The existing schedulers in YARN (Fair and Capacity Scheduler) allocate containers to a node only if there are unallocated resources at that node at the moment of scheduling the containers. This **guaranteed** type of execution has the advantage that once the AM dispatches a container to a node, the container execution will start immediately, since it is guaranteed that there will be available resources. Moreover, unless fairness or capacity constraints are violated, containers are guaranteed to run to completion without being preempted. + +Although this design offers a more predictable task execution, it has two main drawbacks that can lead to suboptimal cluster resource utilization: + +* **Feedback delays.** When a container finishes its execution at a node, the RM gets notified that there are available resources through the next NM-RM heartbeat, then the RM schedules a new container at that node, the AM gets notified through the next AM-RM heartbeat, and finally the AM launches the new container at the node. These delays result in idle node resources, which in turn lead to lower resource utilization, especially when workloads involve tasks whose duration is relatively short. +* **Allocated vs. utilized resources.** The RM allocates containers based on the *allocated* resources at each node, which might be significantly higher than the actually *utilized* resources (e.g., think of a container for which 4GB memory have been allocated, but only 2GB are being utilized). This lowers effective resource utilization, and can be avoided if the RM takes into account the utilized resources during scheduling. However, this has to be done in a way that allows resources to be reclaimed in case the utilized resources of a running container increase. + +To mitigate the above problems, in addition to the existing containers (which we term **guaranteed** containers hereafter), we introduce the notion of **opportunistic** containers. An opportunistic container can be dispatched to an NM, even if there are no available (unallocated) resources for it at the moment of scheduling. In such a case, the opportunistic container will be queued at the NM, waiting for resources to become available for its execution to start. The opportunistic containers are of lower priority than the guaranteed ones, which means that they can be preempted for guaranteed containers to start their execution. Therefore, they can be used to improve cluster resource utilization without impacting the execution of existing guaranteed containers. + +An additional advantage of opportunistic containers is that they introduce a notion of **execution priority at the NMs**. For instance, a lower priority job that does not require strict execution guarantees can use opportunistic containers or a mix of container execution types for its tasks. + +We have introduced two ways of allocating opportunistic containers: a **centralized** and a **distributed** one. In the centralized scheduling, opportunistic containers are allocated through the YARN RM, whereas in the distributed one, through local schedulers that reside at each NM. Centralized allocation allows for higher quality placement decisions and for implementing more involved sharing policies across applications (e.g., fairness). On the other hand, distributed scheduling can offer faster container allocation, which is useful for short tasks, as it avoids the round-trip to the RM. In both cases, the scheduling of guaranteed containers remains intact and happens through the YARN RM (using the existing Fair or Capacity Scheduler). + +Note that in the current implementation, we are allocating containers based on allocated (and not utilized) resources. Therefore, we tackle the "feedback delays" problem mentioned above, but not the "allocated vs. utilized resources" one. There is ongoing work (`YARN-1011`) that employs opportunistic containers to address the latter problem too. + +Below, we describe in more detail the [container execution types](#Container_Execution_Types), as well as the [execution](#Execution_of_Opportunistic_Containers) (including the container queuing at the NMs) and [allocation](#Allocation_of_Opportunistic_Containers) of opportunistic containers. Then we discuss how to fine-tune opportunistic containers through some [advanced configuration parameters](#Advanced_Configuration). Finally, we discuss open items for [future work](#Items_for_Future_Work). + + +Container Execution Types +----------------------------------------------------------------- + +We introduce the following two types of containers: + +* **Guaranteed containers** correspond to the existing YARN containers. They are allocated by the Fair or Capacity Scheduler, and once dispatched to a node, it is guaranteed that there are available resources for their execution to start immediately. Moreover, these containers run to completion (as long as there are no failures). They can be preempted only in case the scheduler's queue to which they belong, violates fairness or capacity constraints. +* **Opportunistic containers** are not guaranteed to have resources for their execution to start when they get dispatched to a node. Instead, they might be queued at the NM until resources become available. In case a guaranteed container arrives at a node and there are no resources available for it, one or more opportunistic containers will be preempted to execute the guaranteed one. + +When an AM submits its resource requests to the RM, it specifies the type for each container (default is guaranteed), determining the way the container will be [allocated](#Allocation_of_Opportunistic_Containers). Subsequently, when the container is launched by the AM at an NM, its type determines how it will be [executed](#Execution_of_Opportunistic_Containers) by the NM. + + +Execution of Opportunistic Containers +--------------------------------------------------------------------------- + +When a container arrives at an NM, its execution is determined by the available resources at the NM and the container type. Guaranteed containers start their execution immediately, and if needed, the NM will kill running opportunistic containers to ensure there are sufficient resources for the guaranteed ones to start. On the other hand, opportunistic containers can be queued at the NM, if there are no resources available to start their execution when they arrive at the NM. To enable this, we extended the NM by allowing queuing of containers at each node. The NM monitors the local resources, and when there are sufficient resources available, it starts the execution of the opportunistic container that is at the head of the queue. + +In particular, when a container arrives at an NM, localization is performed (i.e., all required resources are downloaded), and then the container moves to a `SCHEDULED` state, in which the container is queued, waiting for its execution to begin: + +* If there are available resources, the execution of the container starts immediately, irrespective of its execution type. +* If there are no available resources: + * If the container is guaranteed, we kill as many running opportunistic containers as required for the guaranteed container to be executed, and then start its execution. + * If the container is opportunistic, it remains at the queue until resources become available. +* When a container (guaranteed or opportunistic) finishes its execution and resources get freed up, we examine the queued containers and if there are available resources we start their execution. We pick containers from the queue in a FIFO order. + +In the [future work items](#Items_for_Future_Work) below, we discuss different ways of prioritizing task execution (queue reordering) and of killing opportunistic containers to make space for guaranteed ones. + + +Allocation of Opportunistic Containers +----------------------------------------------------------------------------- + +As mentioned above, we provide both a centralized and a distributed way of allocating opportunistic containers, which we describe below. + +###Centralized Allocation + +We have introduced a new service at the RM, namely the `OpportunisticContainerAllocatorAMService`, which extends the `ApplicationMasterService`. When the centralized opportunistic allocation is enabled, the resource requests from the AMs are served at the RM side by the `OpportunisticContainerAllocatorAMService`, which splits them into two sets of resource requests: + +* The guaranteed set is forwarded to the existing `ApplicationMasterService` and is subsequently handled by the Fair or Capacity Scheduler. +* The opportunistic set is handled by the new `OpportunisticContainerAllocator`, which performs the scheduling of opportunistic containers to nodes. + +The `OpportunisticContainerAllocator` maintains a list with the [least loaded nodes](#Determining_Nodes_for_Allocation) of the cluster at each moment, and assigns containers to them in a round-robin fashion. Note that in the current implementation, we purposely do not take into account node locality constraints. Since an opportunistic container (unlike the guaranteed ones) might wait at the queue of an NM before its execution starts, it is more important to allocate it at a node that is less loaded (i.e., where queuing delay will be smaller) rather than respect its locality constraints. Moreover, we do not take into account sharing (fairness/capacity) constraints for opportunistic containers at the moment. Support for both locality and sharing constraints can be added in the future if required. + + +###Distributed Allocation + +In order to enable distributed scheduling of opportunistic containers, we have introduced a new service at each NM, called `AMRMProxyService`. The `AMRMProxyService` implements the `ApplicationMasterService` protocol, and acts as a proxy between the AMs running at that node and the RM. When the `AMRMProxyService` is enabled (through a parameter), we force all AMs running at a particular node to communicate with the `AMRMProxyService` of the same node, instead of going directly to the RM. Moreover, to ensure that the AMs will not talk directly with the RM, when a new AM gets initialized, we replace its `AMRMToken` with a token signed by the `AMRMProxyService`. + +A chain of interceptors can be registered with the `AMRMProxyService`. One of these interceptors is the `DistributedScheduler` that is responsible for allocating opportunistic containers in a distributed way, without needing to contact the RM. This modular design makes the `AMRMProxyService` instrumental in other scenarios too, such as YARN federation (`YARN-2915`) or throttling down misbehaving AMs, which can be enabled simply by adding additional interceptors at the interceptor chain. + +When distributed opportunistic scheduling is enabled, each AM sends its resource requests to the `AMRMProxyService` running at the same node. The `AMRMProxyService` splits the resource requests into two sets: + +* The guaranteed set is forwarded to the RM. In this case the `AMRMProxyService` simply acts as a proxy between the AM and the RM, and the container allocation remains intact (using the Fair or Capacity Scheduler). +* The opportunistic set is not forwarded to the RM. Instead, it is handled by the `DistributedScheduler` that is running locally at the node. In particular, the `DistributedScheduler` maintains a list with the least loaded nodes in the cluster, and allocates containers to them in a round-robin fashion. The RM informs the `DistributedScheduler` about the least loaded nodes at regular intervals through the NM-RM heartbeats. + +The above procedure is similar to the one performed by the `OpportunisticContainerAllocatorAMService` in the case of centralized opportunistic scheduling described above. The main difference is that in the distributed case, the splitting of requests into guaranteed and opportunistic happens locally at the node, and only the guaranteed requests are forwarded to the RM, while the opportunistic ones are handled without contacting the RM. + + +###Determining Nodes for Allocation + +Each NM informs the RM periodically through the NM-RM heartbeats about the number of running guaranteed and opportunistic containers, as well as the number of queued opportunistic containers. The RM gathers this information from all nodes and determines the least loaded ones. + +In the case of centralized allocation of opportunistic containers, this information is immediately available, since the allocation happens centrally. In the case of distributed scheduling, the list with the least loaded nodes is propagated to all NMs (and thus becomes available to the `DistributedSchedulers`) through the heartbeat responses from the RM to the NMs. The number of least loaded nodes sent to the NMs is configurable. + +At the moment, we take into account only the number of queued opportunistic containers at each node in order to estimate the time an opportunistic container would have to wait if sent to that node and, thus, determine the least loaded nodes. If the AM provided us with information about the estimated task durations, we could take them into account in order to have better estimates of the queue waiting times. + + +###Rebalancing Node Load + +Occasionally poor placement choices for opportunistic containers may be made (due to stale queue length estimates), which can lead to load imbalance between nodes. The problem is more pronounced under high cluster load, and also in the case of distributed scheduling (multiple `DistributedSchedulers` may place containers at the same NM, since they do not coordinate with each other). To deal with this load imbalance between the NM queues, we perform load shedding to dynamically re-balance the load between NMs. In particular, while aggregating at the RM the queue time estimates published by each NM, we construct a distribution and find a targeted maximal value for the length of the NM queues (based on the mean and standard deviation of the distribution). Then the RM disseminates this value to the various NMs through the heartbeat responses. Subsequently, using this information, an NM on a node whose queue length is above the threshold discards opportunistic containers to meet this maximal value. This forces the associated individual AMs to reschedule those containers elsewhere. + + +Advanced Configuration +-------------------------------------------------- + +The main properties for enabling opportunistic container allocation and choosing between centralized and distributed allocation were described in the [quick guide](#Quick_Guide) in the beginning of this document. Here we present more advanced configuration. Note that using default values for those parameters should be sufficient in most cases. All parameters below have to be defined in the **conf/yarn-site.xml** file. + +To determine the number of [least loaded nodes](#Determining_Nodes_for_Allocation) that will be used when scheduling opportunistic containers and how often this list will be refreshed, we use the following parameters: + +| Property | Description | Default value | +|:-------- |:----- |:----- | +| `yarn.resourcemanager.opportunistic-container-allocation.nodes-used` | Number of least loaded nodes to be used by the Opportunistic Container allocator for dispatching containers during container allocation. A higher value can improve load balance in large clusters. | `10` | +| `yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms` | Frequency for computing least loaded nodes. | `1000` | + + +As discussed in the [node load rebalancing](#Rebalancing_Node_Load) section above, at regular intervals, the RM gathers all NM queue lengths and computes their mean value (`avg`) and standard deviation (`stdev`), as well as the value `avg + k*stdev` (where `k` a float). This value gets propagated through the NM-RM heartbeats to all NMs, who should respect that value by dequeuing containers (if required), as long as their current queue length is between a `queue_min_length` and a `queue_max_length` value (these values are used to avoid dequeuing tasks from very short queues and to aggressively dequeue tasks from long queues, respectively). +The parameters `k`, `queue_min_length` and `queue_max_length` can be specified as follows: + +| Property | Description | Default value | +|:-------- |:----- |:----- | +| `yarn.resourcemanager.nm-container-queuing.queue-limit-stdev` | The `k` parameter. | `1.0f` | +| `yarn.resourcemanager.nm-container-queuing.min-queue-length` | The `queue_min_length` parameter. | `5` | +| `yarn.resourcemanager.nm-container-queuing.max-queue-length` | The `queue_max_length` parameter. | `15` | + + +Finally, two more properties can further tune the `AMRMProxyService` in case distributed scheduling is used: + +| Property | Description | Default value | +|:-------- |:----- |:----- | +| `yarn.nodemanager.amrmproxy.address` | The address/port to which the `AMRMProxyService` is bound to. | `0.0.0.0:8049` | +| `yarn.nodemanager.amrmproxy.client.thread-count` | The number of threads that are used at each NM for serving the interceptors register to the `AMRMProxyService` by different jobs. | `3` | + + +Items for Future Work +----------------------------------------------- + +Here we describe multiple ways in which we can extend/enhance the allocation and execution of opportunistic containers. We also provide the JIRAs that track each item. + +* **Resource overcommitment** (`YARN-1011`). As already discussed, in order to further improve the cluster resource utilization, we can schedule containers not based on the allocated resources but on the actually utilized ones. When over-committing resources, there is the risk of running out of resources in case we have an increase in the utilized resources of the already running containers. Therefore, opportunistic execution should be used for containers whose allocation goes beyond the capacity of a node. This way, we can choose opportunistic containers to kill for reclaiming resources. +* **NM Queue reordering** (`YARN-5886`). Instead of executing queued containers in a FIFO order, we can employ reordering strategies that dynamically determine which opportunistic container will be executed next. For example, we can prioritize containers that are expected to be short-running or which belong to applications that are close to completion. +* **Out of order killing at NMs** (`YARN-5887`). As described above, when we need to free up resources for a guaranteed container to start its execution, we kill opportunistic containers in reverse order of arrival (first the most recently started ones). This might not always be the right decision. For example, we might want to minimize the number of containers killed or to refrain from killing containers of jobs that are very close to completion. +* **Container pausing** (`YARN-5292`): At the moment we kill opportunistic containers to make room for guaranteed in case of resource contention. In busy clusters this can lower the effective cluster utilization: whenever we kill a running opportunistic container, it has to be restarted, and thus we lose work. To this end, we can instead pause running opportunistic containers. Note that this will require support from the container executor (e.g., the container technology used) and from the application. +* **Container promotion** (`YARN-5085`). There are cases where changing the execution type of a container during its execution can be beneficial. For instance, an application might submit a container as opportunistic, and when its execution starts, it can request its promotion to a guaranteed container to avoid it getting killed.