diff --git llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index 6fa3107..efd774d 100644 --- llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -870,6 +870,9 @@ private void disableInstance(ServiceInstance instance, boolean isCommFailure) { if (metrics != null) { metrics.setDisabledNodeCount(disabledNodesQueue.size()); } + // Trigger a scheduling run - in case there's some task which was waiting for this node to + // become available. + trySchedulingPendingTasks(); } } finally { writeLock.unlock(); diff --git llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskSchedulerService.java llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskSchedulerService.java index afbab95..402658b 100644 --- llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskSchedulerService.java +++ llap-tez/src/test/org/apache/hadoop/hive/llap/tezplugins/TestLlapTaskSchedulerService.java @@ -773,7 +773,7 @@ public void testForcedLocalityMultiplePreemptionsSameHost2() throws IOException, // At this point. one p=2 task and task3(p=1) running. Ask for another p1 task. while (true) { tsWrapper.signalSchedulerRun(); - tsWrapper.awaitSchedulerRun(); + tsWrapper.awaitSchedulerRun(1000l); if (tsWrapper.ts.dagStats.numPreemptedTasks == 2) { break; } @@ -1174,6 +1174,9 @@ public void testDelayedLocalityNodeCommErrorImmediateAllocation() throws IOExcep assertEquals(task2, argumentCaptor.getAllValues().get(1)); reset(tsWrapper.mockAppCallback); + // Flush any pending scheduler runs which may be blocked. Wait 2 seconds for the run to complete. + tsWrapper.signalSchedulerRun(); + tsWrapper.awaitSchedulerRun(2000l); // Mark a task as failed due to a comm failure. tsWrapper.deallocateTask(task1, false, TaskAttemptEndReason.COMMUNICATION_ERROR);