diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java index b4d4a809bb6..ea6c7cfa361 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java @@ -918,6 +918,7 @@ private void printSimulationInfo() { nodeManagerResource.getResourceValue(ResourceInformation.VCORES_URI)); simulateInfoMap.put("Number of applications", numAMs); simulateInfoMap.put("Number of tasks", numTasks); + //TODO typo simulateInfoMap.put("Average tasks per applicaion", (int)(Math.ceil((numTasks + 0.0) / numAMs))); simulateInfoMap.put("Number of queues", queueAppNumMap.size()); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java index 1330e4d2f2b..e7f8008a03d 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java @@ -99,7 +99,7 @@ protected long traceStartTimeMS; protected long traceFinishTimeMS; protected long simulateStartTimeMS; - protected long simulateFinishTimeMS; + protected long simulateFinishTimeMS = 0; // whether tracked in Metrics protected boolean isTracked; // progress @@ -221,7 +221,18 @@ public void middleStep() throws Exception { @Override public void lastStep() throws Exception { - LOG.info("Application {} is shutting down.", appId); + if (simulateFinishTimeMS != 0) { + // The finish time is already recorded. + // Different value from zero means lastStep was called before. + // We want to prevent lastStep to be called more than once. + // See YARN-10427 for more details. + LOG.warn("Method AMSimulator#lastStep was already called. " + + "Skipping execution of method for application: {}", appId); + return; + } + + LOG.info(String.format("Application %s is shutting down. lastStep " + + "Stacktrace", appId), new Exception()); // unregister tracking if (isTracked) { untrackApp(); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 32567db666e..476065b5838 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode .UpdatedContainerInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Private @Unstable @@ -54,6 +56,7 @@ public static NodeId newNodeID(String host, int port) { return NodeId.newInstance(host, port); } + private static final Logger LOG = LoggerFactory.getLogger(NodeInfo.class); @Private @Unstable @@ -160,6 +163,7 @@ public void resetLastNodeHeartBeatResponse() { } public List pullContainerUpdates() { + LOG.info("***FakeRMNodeImpl#pullContainerUpdates"); ArrayList list = new ArrayList(); ArrayList list2 = new ArrayList(); @@ -171,6 +175,7 @@ public void resetLastNodeHeartBeatResponse() { new ArrayList>(); list.add(new UpdatedContainerInfo(new ArrayList(), list2, needUpdateContainers)); + LOG.info("***FakeRMNodeImpl#pullContainerUpdates, last list item: " + list.get(list.size() - 1)); return list; } @@ -253,6 +258,24 @@ public long calculateHeartBeatInterval( float speedupFactor, float slowdownFactor) { return defaultInterval; } + + @Override + public String toString() { + return "FakeRMNodeImpl{" + + "nodeId=" + nodeId + + ", hostName='" + hostName + '\'' + + ", nodeAddr='" + nodeAddr + '\'' + + ", httpAddress='" + httpAddress + '\'' + + ", cmdPort=" + cmdPort + + ", perNode=" + perNode + + ", rackName='" + rackName + '\'' + + ", healthReport='" + healthReport + '\'' + + ", state=" + state + + ", toCleanUpContainers=" + toCleanUpContainers + + ", toCleanUpApplications=" + toCleanUpApplications + + ", runningApplications=" + runningApplications + + '}'; + } } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index b5ae4f5b3c0..3f2e47cd595 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -36,6 +36,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode .UpdatedContainerInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.Collections; import java.util.List; @@ -48,10 +50,13 @@ private RMNode node; private List updates; private boolean pulled = false; + private static final Logger LOG = LoggerFactory.getLogger(RMNodeWrapper.class); public RMNodeWrapper(RMNode node) { this.node = node; updates = node.pullContainerUpdates(); + LOG.info("***RMNodeWrapper constructor. updates: " + updates); + LOG.info("RMNode instance: " + node.getClass()); } @Override @@ -147,11 +152,16 @@ public void resetLastNodeHeartBeatResponse() { @Override public List pullContainerUpdates() { + LOG.info("****RMNodeWrapper:: Pulling container updates"); List list = Collections.emptyList(); if (! pulled) { + LOG.info("***RMNodeWrapper#pullContainerUpdates, pulled=false"); list = updates; pulled = true; + } else { + LOG.info("***RMNodeWrapper#pullContainerUpdates, pulled=true"); } + LOG.info("***Resulted updates: " + list); return list; } @@ -238,4 +248,13 @@ public long calculateHeartBeatInterval( float speedupFactor, float slowdownFactor) { return defaultInterval; } + + @Override + public String toString() { + return "RMNodeWrapper{" + + "node=" + node + + ", updates=" + updates + + ", pulled=" + pulled + + '}'; + } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java index 19cfe88d1ab..7c17aefd1cd 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/TaskRunner.java @@ -16,7 +16,9 @@ * limitations under the License. */ package org.apache.hadoop.yarn.sls.scheduler; - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.yarn.sls.appmaster.AMSimulator; import java.text.MessageFormat; import java.util.Queue; import java.util.concurrent.DelayQueue; @@ -33,6 +35,7 @@ @Private @Unstable public abstract static class Task implements Runnable, Delayed { + private static final Logger LOG = LoggerFactory.getLogger(Task.class); private long start; private long end; private long nextRun; @@ -45,6 +48,7 @@ public Task(){} //values in milliseconds, start/end are milliseconds from now public void init(long startTime, long endTime, long repeatInterval) { + logAmSimulatorInit(); if (endTime - startTime < 0) { throw new IllegalArgumentException(MessageFormat.format( "endTime[{0}] cannot be smaller than startTime[{1}]", endTime, @@ -66,6 +70,7 @@ public void init(long startTime, long endTime, long repeatInterval) { } private void timeRebase(long now) { + logAmSimulatorTimeRebase(now, startTime, now + start, endTime, now + end, this.nextRun, now + start); startTime = now + start; endTime = now + end; this.nextRun = startTime; @@ -85,16 +90,19 @@ private void setQueue(Queue queue) { public final void run() { try { if (nextRun == startTime) { + logAmSimulatorStep("firstStep"); firstStep(); nextRun += repeatInterval; if (nextRun <= endTime) { queue.add(this); } } else if (nextRun < endTime) { + logAmSimulatorStep("middleStep"); middleStep(); nextRun += repeatInterval; queue.add(this); } else { + logAmSimulatorStep("lastStep"); lastStep(); } } catch (Exception e) { @@ -104,6 +112,41 @@ public final void run() { } } + private void logAmSimulatorStep(String step) { + if (this instanceof AMSimulator) { + AMSimulator amsim = ((AMSimulator) this); + LOG.debug("Executing step: {} for application: {}, AMSimulator details: {}", step, amsim.getApplicationId(), getAmSimulatorDetails()); + } + } + + private void logAmSimulatorInit() { + if (this instanceof AMSimulator) { + AMSimulator amsim = ((AMSimulator) this); + LOG.debug("Executing init for application: {}, AMSimulator details: {}", amsim.getApplicationId(), getAmSimulatorDetails()); + } + } + + private void logAmSimulatorTimeRebase(long now, long startTimeCurr, long startTimeNext, + long endTimeCurr, long endTimeNext, long nextRunCurr, long nextRunNext) { + if (this instanceof AMSimulator) { + AMSimulator amsim = ((AMSimulator) this); + LOG.debug("Executing TimeRebase for application: {}, now: {}, startTimeCurr: {}, startTimeNext: {}, " + + "endTimeCurr: {}, endTimeNext: {}, nextRunCurr: {}, nextRunNext: {}", + amsim.getApplicationId(), now, startTimeCurr, startTimeNext, endTimeCurr, endTimeNext, nextRunCurr, nextRunNext); + } + } + + + private String getAmSimulatorDetails() { + if (this instanceof AMSimulator) { + AMSimulator amsim = ((AMSimulator) this); + return String.format("AMSIM details for application: %s, --> start: %d, end: %d, nextrun: %d, starttime: %d, endTime:" + + " %d, repeatInterval: %d", + amsim.getApplicationId(), start, end, nextRun, startTime, endTime, repeatInterval); + } + return ""; + } + @Override public long getDelay(TimeUnit unit) { return unit.convert(nextRun - System.currentTimeMillis(), diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/web/TestSLSWebApp.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/web/TestSLSWebApp.java index c3f66a0b74d..22c5a49b6df 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/web/TestSLSWebApp.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/web/TestSLSWebApp.java @@ -44,6 +44,7 @@ public void testSimulateInfoPageHtmlTemplate() throws Exception { simulateInfoMap.put("Node VCores", 1); simulateInfoMap.put("Number of applications", 100); simulateInfoMap.put("Number of tasks", 1000); + //TODO typo simulateInfoMap.put("Average tasks per applicaion", 10); simulateInfoMap.put("Number of queues", 4); simulateInfoMap.put("Average applications per queue", 25); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/UpdatedContainerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/UpdatedContainerInfo.java index d55db68b257..5f5c7a79200 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/UpdatedContainerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/UpdatedContainerInfo.java @@ -52,4 +52,12 @@ public UpdatedContainerInfo(List newlyLaunchedContainers, public List> getUpdateContainers() { return this.updateContainers; } + + @Override + public String toString() { + return "UpdatedContainerInfo{" + + "newlyLaunchedContainers=" + newlyLaunchedContainers + + ", completedContainers=" + completedContainers + + '}'; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 41442363711..25225fd5064 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -372,6 +372,7 @@ protected void containerLaunchedOnNode( ContainerId containerId, SchedulerNode node) { readLock.lock(); try { + LOG.info("Container launched. container: " + containerId + ", node: " + node); // Get the application for the finished container SchedulerApplicationAttempt application = getCurrentAttemptForContainer(containerId); @@ -1033,7 +1034,9 @@ public SchedulerNode getNode(NodeId nodeId) { */ private List updateNewContainerInfo(RMNode nm, SchedulerNode schedulerNode) { + LOG.info("***AbstractYarnScheduler#updateNewContainerInfo: nm class: " + nm.getClass() + ", NM object: " + nm.toString()); List containerInfoList = nm.pullContainerUpdates(); + LOG.info("***After nm.pullContainerUpdates::: containerInfoList: " + containerInfoList); List newlyLaunchedContainers = new ArrayList<>(); List completedContainers = @@ -1042,11 +1045,15 @@ public SchedulerNode getNode(NodeId nodeId) { new ArrayList<>(); for(UpdatedContainerInfo containerInfo : containerInfoList) { + LOG.info("***Adding newly launched containers: " + containerInfo.getNewlyLaunchedContainers()); newlyLaunchedContainers .addAll(containerInfo.getNewlyLaunchedContainers()); completedContainers.addAll(containerInfo.getCompletedContainers()); updateExistContainers.addAll(containerInfo.getUpdateContainers()); } + + LOG.info("***completedContainers: " + completedContainers); + LOG.info("***newlyLaunchedContainers: " + newlyLaunchedContainers); // Processing the newly launched containers for (ContainerStatus launchedContainer : newlyLaunchedContainers) {