diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 02ed231..7ccbb96 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -1102,6 +1102,14 @@ public class ServerManager { } /** + * Check whether a server is online based on hostname and port + * @return true if finding a server with matching hostname and port. + */ + public boolean isServerWithSameHostnamePortOnline(final ServerName serverName) { + return findServerWithSameHostnamePortWithLock(serverName) != null; + } + + /** * Check if a server is known to be dead. A server can be online, * or known to be dead, or unknown to this manager (i.e, not online, * not known to be dead either. it is simply not tracked by the diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 97afa49..75f1c47 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; -import org.apache.hadoop.hbase.conf.ConfigurationObserver; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RackManager; diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 163c942..ca407fc 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -21,7 +21,9 @@ package org.apache.hadoop.hbase.master.handler; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.locks.Lock; @@ -307,13 +309,33 @@ public class ServerShutdownHandler extends EventHandler { } } + // Determine what type of assignment to do if the dead server already + // restarted. + boolean retainAssignment = + server.getConfiguration().getBoolean("hbase.master.retain.assignment", true); + try { - am.assign(toAssignRegions); + if (retainAssignment && serverManager.isServerWithSameHostnamePortOnline(serverName)) { + Map toAssignRegionsMap = + new HashMap(toAssignRegions.size()); + for (HRegionInfo hri: toAssignRegions) { + toAssignRegionsMap.put(hri, serverName); + } + LOG.info("Best effort in SSH to retain assignment of " + toAssignRegions.size() + + " regions from the dead server " + serverName); + am.assign(toAssignRegionsMap); + } else { + LOG.info("Using round robin in SSH to assign " + toAssignRegions.size() + + " regions from the dead server " + serverName); + retainAssignment = false; + am.assign(toAssignRegions); + } } catch (InterruptedException ie) { - LOG.error("Caught " + ie + " during round-robin assignment"); + LOG.error("Caught " + ie + " during " + (retainAssignment ? "retaining" : "round-robin") + + "assignment"); throw (InterruptedIOException)new InterruptedIOException().initCause(ie); } catch (IOException ioe) { - LOG.info("Caught " + ioe + " during region assignment, will retry"); + LOG.warn("Caught " + ioe + " during region assignment, will retry"); // Only do wal splitting if shouldSplitWal and in DLR mode serverManager.processDeadServer(serverName, this.shouldSplitWal && distributedLogReplay); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java index d585756..7bd3444 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java @@ -112,6 +112,8 @@ public class TestAssignmentManager { private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); private static final ServerName SERVERNAME_A = ServerName.valueOf("example.org", 1234, 5678); + private static final ServerName SERVERNAME_AA = + ServerName.valueOf("example.org", 1234, 9999); private static final ServerName SERVERNAME_B = ServerName.valueOf("example.org", 0, 5678); private static final HRegionInfo REGIONINFO = @@ -487,6 +489,33 @@ public class TestAssignmentManager { } /** + * Run a simple server shutdown handler after the same server restarts. + * @throws KeeperException + * @throws IOException + */ + @Test (timeout=180000) + public void testShutdownHandlerWithRestartedServer() + throws KeeperException, IOException, CoordinatedStateException, ServiceException { + // Create and startup an executor. This is used by AssignmentManager + // handling zk callbacks. + ExecutorService executor = startupMasterExecutor("testShutdownHandlerWithRestartedServer"); + + // Create an AM. + AssignmentManagerWithExtrasForTesting am = + setUpMockedAssignmentManager(this.server, this.serverManager); + am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_A); + am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED); + try { + processServerShutdownHandler(am, false, true); + } finally { + executor.shutdown(); + am.shutdown(); + // Clean up all znodes + ZKAssign.deleteAllNodes(this.watcher); + } + } + + /** * To test closed region handler to remove rit and delete corresponding znode * if region in pending close or closing while processing shutdown of a region * server.(HBASE-5927). @@ -621,6 +650,12 @@ public class TestAssignmentManager { private void processServerShutdownHandler(AssignmentManager am, boolean splitRegion) throws IOException, ServiceException { + processServerShutdownHandler(am, splitRegion, false); + } + + private void processServerShutdownHandler( + AssignmentManager am, boolean splitRegion, boolean deadserverRestarted) + throws IOException, ServiceException { // Make sure our new AM gets callbacks; once registered, can't unregister. // Thats ok because we make a new zk watcher for each test. this.watcher.registerListenerFirst(am); @@ -676,6 +711,25 @@ public class TestAssignmentManager { // Have it that SERVERNAME_A died. DeadServer deadServers = new DeadServer(); deadServers.add(SERVERNAME_A); + Mockito.when(this.serverManager.isServerReachable(SERVERNAME_B)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false); + final Map onlineServers = new HashMap(); + onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD); + if (deadserverRestarted) { + // Now make the same server (same host name and port) online again with a different + // start code. + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_AA)).thenReturn(true); + Mockito.when(this.serverManager.isServerReachable(SERVERNAME_AA)).thenReturn(true); + Mockito.when( + this.serverManager.isServerWithSameHostnamePortOnline(SERVERNAME_A)).thenReturn(true); + onlineServers.put(SERVERNAME_AA, ServerLoad.EMPTY_SERVERLOAD); + } + Mockito.when(this.serverManager.getOnlineServersList()).thenReturn( + new ArrayList(onlineServers.keySet())); + Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers); + List avServers = new ArrayList(); + avServers.addAll(onlineServers.keySet()); + Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers); // I need a services instance that will return the AM MasterFileSystem fs = Mockito.mock(MasterFileSystem.class); Mockito.doNothing().when(fs).setLogRecoveryMode(); @@ -1383,6 +1437,14 @@ public class TestAssignmentManager { } @Override + public void assign(Map regionServerMap) + throws IOException, InterruptedException { + assignInvoked = (regionServerMap != null && regionServerMap.size() > 0); + super.assign(regionServerMap); + this.gate.set(true); + } + + @Override public void assign(List regions) throws IOException, InterruptedException { assignInvoked = (regions != null && regions.size() > 0);