Index: src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java (revision 1456319) +++ src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java (working copy) @@ -672,7 +672,7 @@ List listOfOps = new ArrayList(); try { peerIdsToProcess = ZKUtil.listChildrenNoWatch(this.zookeeper, deadRSZnodePath); - if (peerIdsToProcess == null) return null; // node already processed + if (peerIdsToProcess == null) return queues; // node already processed for (String peerId : peerIdsToProcess) { String newPeerId = peerId + "-" + znode; String newPeerZnode = ZKUtil.joinZNode(this.rsServerNameZnode, newPeerId); @@ -707,6 +707,7 @@ } catch (KeeperException e) { // Multi call failed; it looks like some other regionserver took away the logs. LOG.warn("Got exception in copyQueuesFromRSUsingMulti: ", e); + queues.clear(); } return queues; } Index: src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java (revision 1456319) +++ src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java (working copy) @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeSet; @@ -89,6 +90,7 @@ private final long sleepBeforeFailover; // Homemade executer service for replication private final ThreadPoolExecutor executor; + private Random random; /** * Creates a replication manager and sets the watch on all the other @@ -119,6 +121,7 @@ this.logDir = logDir; this.oldLogDir = oldLogDir; this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 2000); + this.random = new Random(); this.zkHelper.registerRegionServerListener( new OtherRegionServerWatcher(this.zkHelper.getZookeeperWatcher())); this.zkHelper.registerRegionServerListener( @@ -572,7 +575,7 @@ // Wait a bit before transferring the queues, we may be shutting down. // This sleep may not be enough in some cases. try { - Thread.sleep(sleepBeforeFailover); + Thread.sleep(sleepBeforeFailover + (long)(random.nextFloat()*sleepBeforeFailover)); } catch (InterruptedException e) { LOG.warn("Interrupted while waiting before transferring a queue."); Thread.currentThread().interrupt();