diff --git llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index e5dc378f62..3780e5dc98 100644 --- llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -770,7 +770,7 @@ void nodePinged(String hostname, String uniqueId, int port, } attempts.put((TezTaskAttemptID)tasks.get()[i], isGuaranteed); } - String error = ""; + Set error = new HashSet<>(); synchronized (biMap) { for (Map.Entry entry : biMap.entrySet()) { // TODO: this is a stopgap fix. We really need to change all mappings by unique node ID, @@ -788,7 +788,9 @@ void nodePinged(String hostname, String uniqueId, int port, getContext().taskAlive(attemptId); scheduler.taskInfoUpdated(attemptId, isGuaranteed.booleanValue()); } else { - error += (attemptId + ", "); + if (scheduler.isInitialGuaranteed(attemptId)) { + error.add(attemptId); + } } getContext().containerAlive(entry.getKey()); } @@ -796,10 +798,10 @@ void nodePinged(String hostname, String uniqueId, int port, if (!error.isEmpty()) { LOG.info("The tasks we expected to be on the node are not there: " + error); - for (Map.Entry entry : biMap.entrySet()) { + for (TezTaskAttemptID attempt : error) { LOG.info("Sending a kill for attempt {}, due to a ping from node with same host and same port but " + - "registered with different unique ID", entry.getValue()); - getContext().taskKilled(entry.getValue(), TaskAttemptEndReason.NODE_FAILED, + "registered with different unique ID", attempt); + getContext().taskKilled(attempt, TaskAttemptEndReason.NODE_FAILED, "Node with same host and port but with new unique ID pinged"); } }