diff --git src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 312a211..f0d260e 100644 --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1681,6 +1681,8 @@ public class AssignmentManager extends ZooKeeperListener { boolean hijack) { boolean regionAlreadyInTransitionException = false; boolean serverNotRunningYet = false; + boolean socketTimeoutException = false; + long maxRegionServerStartupWaitTime = -1; for (int i = 0; i < this.maximumAssignmentAttempts; i++) { int versionOfOfflineNode = -1; @@ -1812,16 +1814,18 @@ public class AssignmentManager extends ZooKeeperListener { + region.getRegionNameAsString() + ", but the region might already be opened on " + plan.getDestination() + ".", t); - return; + socketTimeoutException = true; + i--; // reset the retry } LOG.warn("Failed assignment of " + state.getRegion().getRegionNameAsString() + " to " + plan.getDestination() + ", trying to assign " - + (regionAlreadyInTransitionException || serverNotRunningYet + + (regionAlreadyInTransitionException || serverNotRunningYet || socketTimeoutException ? "to the same region server because of " - + "RegionAlreadyInTransitionException/ServerNotRunningYetException;" + + "RegionAlreadyInTransitionException/ServerNotRunningYetException/" + + "SocketTimeoutException;" : "elsewhere instead; ") + "retry=" + i, t); // Clean out plan we failed execute and one that doesn't look like it'll