Index: core/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- core/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 943644) +++ core/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -600,8 +600,10 @@ ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance, sessionID, password); zk.close(); + final long sleep = sessionTimeout * 5L; + LOG.info("ZK Closed; sleeping=" + sleep); - Thread.sleep(sessionTimeout * 5L); + Thread.sleep(sleep); new HTable(conf, HConstants.META_TABLE_NAME); } Index: core/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java =================================================================== --- core/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java (revision 943644) +++ core/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java (working copy) @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; @@ -31,18 +33,19 @@ import java.io.IOException; public class TestZooKeeper { + private final Log LOG = LogFactory.getLog(this.getClass()); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private Configuration conf; - private MiniHBaseCluster cluster; /** * @throws java.lang.Exception */ @BeforeClass public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); TEST_UTIL.startMiniCluster(1); } @@ -60,7 +63,6 @@ @Before public void setUp() throws Exception { conf = TEST_UTIL.getConfiguration(); - cluster = TEST_UTIL.getHBaseCluster(); } /** @@ -98,6 +100,7 @@ } @Test public void testRegionServerSessionExpired() throws Exception{ + LOG.info("Starting testRegionServerSessionExpired"); this.conf.setBoolean("hbase.regionserver.restart.on.zk.expire", true); new HTable(conf, HConstants.META_TABLE_NAME); TEST_UTIL.expireRegionServerSession(0); @@ -105,6 +108,7 @@ } @Test public void testMasterSessionExpired() throws Exception { + LOG.info("Starting testRegionServerSessionExpired"); new HTable(conf, HConstants.META_TABLE_NAME); TEST_UTIL.expireMasterSession(); testSanity(); @@ -136,7 +140,7 @@ public void testMultipleZK() { try { HTable localMeta = new HTable(conf, HConstants.META_TABLE_NAME); - HBaseConfiguration otherConf = new HBaseConfiguration(conf); + Configuration otherConf = HBaseConfiguration.create(conf); otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); HTable ipMeta = new HTable(conf, HConstants.META_TABLE_NAME); Index: core/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- core/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 943644) +++ core/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.Leases; +import org.apache.hadoop.hbase.Leases.LeaseStillHeldException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -170,6 +171,10 @@ // Test for case where we get a region startup message from a regionserver // that has been quickly restarted but whose znode expiration handler has // not yet run, or from a server whose fail we are currently processing. + // Test its host+port combo is present in serverAddresstoServerInfo. If it + // is, reject the server and trigger its expiration. The next time it comes + // in, it should have been removed from serverAddressToServerInfo and queued + // for processing by ProcessServerShutdown. HServerInfo info = new HServerInfo(serverInfo); String hostAndPort = info.getServerAddress().toString(); HServerInfo existingServer = @@ -183,20 +188,25 @@ } throw new Leases.LeaseStillHeldException(hostAndPort); } - if (isDead(hostAndPort, true)) { - LOG.debug("Server start rejected; currently processing " + hostAndPort + - " failure"); - throw new Leases.LeaseStillHeldException(hostAndPort); - } - if (isDead(hostAndPort, true)) { - LOG.debug("Server start rejected; currently processing " + hostAndPort + - " failure"); - throw new Leases.LeaseStillHeldException(hostAndPort); - } + checkIsDead(info.getServerName(), "STARTUP"); LOG.info("Received start message from: " + info.getServerName()); recordNewServer(info); } + /* + * If this server is on the dead list, reject it with a LeaseStillHeldException + * @param serverName Server name formatted as host_port_startcode. + * @param what START or REPORT + * @throws LeaseStillHeldException + */ + private void checkIsDead(final String serverName, final String what) + throws LeaseStillHeldException { + if (!isDead(serverName)) return; + LOG.debug("Server " + what + " rejected; currently processing " + + serverName + " as dead server"); + throw new Leases.LeaseStillHeldException(serverName); + } + /** * Adds the HSI to the RS list and creates an empty load * @param info The region server informations @@ -249,11 +259,7 @@ final HMsg msgs[], final HRegionInfo[] mostLoadedRegions) throws IOException { HServerInfo info = new HServerInfo(serverInfo); - if (isDead(info.getServerName())) { - LOG.info("Received report from region server " + info.getServerName() + - " previously marked dead. Rejecting report."); - throw new Leases.LeaseStillHeldException(info.getServerName()); - } + checkIsDead(info.getServerName(), "REPORT"); if (msgs.length > 0) { if (msgs[0].isType(HMsg.Type.MSG_REPORT_EXITING)) { processRegionServerExit(info, msgs); @@ -665,7 +671,7 @@ this.master.getRegionManager().offlineMetaServer(info.getServerAddress()); //HBASE-1928: Check whether this server has been transitioning the ROOT table - if (this.master.getRegionManager().isRootServerCandidate (serverName)) { + if (this.master.getRegionManager().isRootInTransitionOnThisServer(serverName)) { this.master.getRegionManager().unsetRootRegion(); this.master.getRegionManager().reassignRootRegion(); } Index: core/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java =================================================================== --- core/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (revision 943644) +++ core/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (working copy) @@ -45,6 +45,8 @@ * serving, and the regions need to get reassigned. */ class ProcessServerShutdown extends RegionServerOperation { + // Server name made of the concatenation of hostname, port and startcode + // formatted as <hostname> ',' <port> ',' <startcode> private final String deadServer; private boolean isRootServer; private List metaRegions; @@ -86,7 +88,7 @@ private void closeMetaRegions() { this.isRootServer = this.master.getRegionManager().isRootServer(this.deadServerAddress) || - this.master.getRegionManager().isRootServerCandidate (deadServer); + this.master.getRegionManager().isRootInTransitionOnThisServer(deadServer); if (this.isRootServer) { this.master.getRegionManager().unsetRootRegion(); } @@ -156,7 +158,7 @@ // Check server name. If null, skip (We used to consider it was on // shutdown server but that would mean that we'd reassign regions that // were already out being assigned, ones that were product of a split - // that happened while the shutdown was being processed. + // that happened while the shutdown was being processed). String serverAddress = BaseScanner.getServerAddress(values); long startCode = BaseScanner.getStartCode(values); @@ -174,7 +176,6 @@ Bytes.toString(row)); } -// HRegionInfo info = master.getHRegionInfo(row, values.rowResult()); HRegionInfo info = master.getHRegionInfo(row, values); if (info == null) { emptyRows.add(row); @@ -248,7 +249,7 @@ public Boolean call() throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug("process server shutdown scanning root region on " + + LOG.debug("Process server shutdown scanning root region on " + master.getRegionManager().getRootRegionLocation().getBindAddress()); } Scan scan = new Scan(); @@ -282,11 +283,9 @@ @Override protected boolean process() throws IOException { - LOG.info("process shutdown of server " + this.deadServer + - ": logSplit: " + - logSplit + ", rootRescanned: " + rootRescanned + - ", numberOfMetaRegions: " + - master.getRegionManager().numMetaRegions() + + LOG.info("Process shutdown of server " + this.deadServer + + ": logSplit: " + logSplit + ", rootRescanned: " + rootRescanned + + ", numberOfMetaRegions: " + master.getRegionManager().numMetaRegions() + ", onlineMetaRegions.size(): " + master.getRegionManager().numOnlineMetaRegions()); if (!logSplit) { @@ -305,13 +304,10 @@ } logSplit = true; } - LOG.info("Log split complete, meta reassignment and scanning:"); - if (this.isRootServer) { LOG.info("ProcessServerShutdown reassigning ROOT region"); master.getRegionManager().reassignRootRegion(); - isRootServer = false; // prevent double reassignment... heh. } @@ -341,12 +337,13 @@ } if (LOG.isDebugEnabled()) { - LOG.debug("process server shutdown scanning root region on " + + LOG.debug("Process server shutdown scanning root region on " + master.getRegionManager().getRootRegionLocation().getBindAddress() + " finished " + Thread.currentThread().getName()); } rootRescanned = true; } + if (!metaTableAvailable()) { // We can't proceed because not all meta regions are online. // metaAvailable() has put this request on the delayedToDoQueue @@ -367,12 +364,10 @@ } closeRegionsInTransition(); - - // Remove this server from dead servers list. Finished splitting logs. this.master.getServerManager().removeDeadServer(deadServer); if (LOG.isDebugEnabled()) { LOG.debug("Removed " + deadServer + " from deadservers Map"); - } + } return true; } Index: core/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java =================================================================== --- core/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (revision 943644) +++ core/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (working copy) @@ -857,7 +857,7 @@ * @param server Server * @return true if server is transitioning the ROOT table */ - public boolean isRootServerCandidate(final String server) { + public boolean isRootInTransitionOnThisServer(final String server) { synchronized (this.regionsInTransition) { for (RegionState s : regionsInTransition.values()) { if (s.getRegionInfo().isRootRegion()