diff --git src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java index 7b7316f..0d36983 100644 --- src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java +++ src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java @@ -324,6 +324,7 @@ public class SplitLogManager extends ZooKeeperListener { LOG.warn("Error splitting " + path); } } + boolean safeToDeleteNode = true; Task task = tasks.get(path); if (task == null) { if (!ZKSplitLog.isRescanNode(watcher, path)) { @@ -341,6 +342,7 @@ public class SplitLogManager extends ZooKeeperListener { if (status == SUCCESS) { task.batch.done++; } else { + safeToDeleteNode = false; task.batch.error++; } task.batch.notify(); @@ -351,8 +353,12 @@ public class SplitLogManager extends ZooKeeperListener { // delete the task node in zk. Keep trying indefinitely - its an async // call and no one is blocked waiting for this node to be deleted. All // task names are unique (log.) there is no risk of deleting - // a future task. - deleteNode(path, Long.MAX_VALUE); + // a future task. This is true if the task status is SUCCESS, if not, + // it will race against split log retry. It will be safer to leave the + // node there is the task is failed and it is not an orphan. + if (safeToDeleteNode) { + deleteNode(path, Long.MAX_VALUE); + } return; }