Index: src/test/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 942003) +++ src/test/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -600,7 +600,6 @@ final boolean hdfsShutdown) throws Exception { HRegionServer rs = hbaseCluster.getRegionServer(index); - if (!hdfsShutdown) rs.setHDFSShutdownThreadOnExit(null); expireSession(rs.getZooKeeperWrapper()); } Index: src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java =================================================================== --- src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java (revision 942003) +++ src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java (working copy) @@ -28,20 +28,33 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; -import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.io.MapWritable; +import org.apache.hadoop.security.UnixUserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation; /** * This class creates a single process HBase cluster. One thread is created for - * each server. + * each server. The master uses the 'default' FileSystem. The RegionServers, + * if we are running on DistributedFilesystem, create a FileSystem instance + * each and will close down their instance on the way out. */ public class MiniHBaseCluster implements HConstants { static final Log LOG = LogFactory.getLog(MiniHBaseCluster.class.getName()); + // Cache this. For some reason only works first time I get it. TODO: Figure + // out why. + private final static UserGroupInformation UGI; + static { + UGI = UserGroupInformation.getCurrentUGI(); + } + private HBaseConfiguration conf; public LocalHBaseCluster hbaseCluster; @@ -107,15 +120,77 @@ } /** - * Subclass so can get at protected methods (none at moment). + * Subclass so can get at protected methods (none at moment). Also, creates + * a FileSystem instance per instantiation. Adds a shutdown own FileSystem + * on the way out. Shuts down own Filesystem only, not All filesystems as + * the FileSystem system exit hook does. */ public static class MiniHBaseClusterRegionServer extends HRegionServer { + private static int index = 0; + public MiniHBaseClusterRegionServer(HBaseConfiguration conf) throws IOException { - super(conf); + super(setDifferentUser(conf)); } + + /* + * @param c + * @param currentfs We return this if we did not make a new one. + * @param uniqueName Same name used to help identify the created fs. + * @return A new fs instance if we are up on DistributeFileSystem. + * @throws IOException + */ + private static HBaseConfiguration setDifferentUser(final HBaseConfiguration c) + throws IOException { + FileSystem currentfs = FileSystem.get(c); + if (!(currentfs instanceof DistributedFileSystem)) return c; + // Else distributed filesystem. Make a new instance per daemon. Below + // code is taken from the AppendTestUtil over in hdfs. + HBaseConfiguration c2 = new HBaseConfiguration(c); + String username = UGI.getUserName() + ".hrs." + index++; + UnixUserGroupInformation.saveToConf(c2, + UnixUserGroupInformation.UGI_PROPERTY_NAME, + new UnixUserGroupInformation(username, new String[]{"supergroup"})); + return c2; + } + + @Override + protected void init(MapWritable c) throws IOException { + super.init(c); + // Change shutdown hook to only shutdown the FileSystem added above by + // {@link #getFileSystem(HBaseConfiguration) + if (getFileSystem() instanceof DistributedFileSystem) { + Thread t = new SingleFileSystemShutdownThread(getFileSystem()); + this.setHDFSShutdownThreadOnExit(t); + } + } + + public void kill() { + super.kill(); + } } + /** + * Alternate shutdown hook. + * Just shuts down the passed fs, not all as default filesystem hook does. + */ + static class SingleFileSystemShutdownThread extends Thread { + private final FileSystem fs; + SingleFileSystemShutdownThread(final FileSystem fs) { + super("Shutdown of " + fs); + this.fs = fs; + } + @Override + public void run() { + try { + LOG.info("Hook closing fs=" + this.fs); + this.fs.close(); + } catch (IOException e) { + LOG.warn("Running hook", e); + } + } + } + private void init(final int nRegionNodes) throws IOException { try { // start up a LocalHBaseCluster @@ -175,8 +250,6 @@ */ public String abortRegionServer(int serverNumber) { HRegionServer server = getRegionServer(serverNumber); - // Don't run hdfs shutdown thread. - server.setHDFSShutdownThreadOnExit(null); LOG.info("Aborting " + server.toString()); server.abort(); return server.toString(); @@ -207,10 +280,6 @@ JVMClusterUtil.RegionServerThread server = hbaseCluster.getRegionServers().get(serverNumber); LOG.info("Stopping " + server.toString()); - if (!shutdownFS) { - // Stop the running of the hdfs shutdown thread in tests. - server.getRegionServer().setHDFSShutdownThreadOnExit(null); - } server.getRegionServer().stop(); return server; } Index: src/test/org/apache/hadoop/hbase/master/TestServerManager.java =================================================================== --- src/test/org/apache/hadoop/hbase/master/TestServerManager.java (revision 0) +++ src/test/org/apache/hadoop/hbase/master/TestServerManager.java (revision 0) @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.*; + +import java.util.HashSet; +import java.util.Set; + +import org.junit.Test; + + +public class TestServerManager { + @Test public void testIsDead() { + Set deadServers = new HashSet(); + final String hostname123 = "one,2,3"; + assertFalse(ServerManager.isDead(deadServers, hostname123, false)); + assertFalse(ServerManager.isDead(deadServers, hostname123, true)); + deadServers.add(hostname123); + assertTrue(ServerManager.isDead(deadServers, hostname123, false)); + assertTrue(ServerManager.isDead(deadServers, "one:2", true)); + } +} \ No newline at end of file Index: src/test/org/apache/hadoop/hbase/master/TestMasterTransistions.java =================================================================== --- src/test/org/apache/hadoop/hbase/master/TestMasterTransistions.java (revision 942003) +++ src/test/org/apache/hadoop/hbase/master/TestMasterTransistions.java (working copy) @@ -19,15 +19,19 @@ */ package org.apache.hadoop.hbase.master; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.net.BindException; import java.util.Collection; import java.util.Set; import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HMsg; @@ -68,6 +72,7 @@ * @throws Exception */ @BeforeClass public static void beforeAllTests() throws Exception { + TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); // Start a cluster of two regionservers. TEST_UTIL.startMiniCluster(2); // Create a table of three families. This will assign a region. @@ -83,151 +88,6 @@ } /** - * HBase2482 is about outstanding region openings. If any are outstanding - * when a regionserver goes down, then they'll never deploy. They'll be - * stuck in the regions-in-transition list for ever. This listener looks - * for a region opening HMsg and if its from the server passed on construction, - * then we kill it. It also looks out for a close message on the victim - * server because that signifies start of the fireworks. - */ - static class HBase2482Listener implements RegionServerOperationListener { - private final HRegionServer victim; - private boolean abortSent = false; - // We closed regions on new server. - private volatile boolean closed = false; - // Copy of regions on new server - private final Collection copyOfOnlineRegions; - // This is the region that was in transition on the server we aborted. Test - // passes if this region comes back online successfully. - private HRegionInfo regionToFind; - - HBase2482Listener(final HRegionServer victim) { - this.victim = victim; - // Copy regions currently open on this server so I can notice when - // there is a close. - this.copyOfOnlineRegions = - this.victim.getCopyOfOnlineRegionsSortedBySize().values(); - } - - @Override - public boolean process(HServerInfo serverInfo, HMsg incomingMsg) { - if (!victim.getServerInfo().equals(serverInfo) || - this.abortSent || !this.closed) { - return true; - } - if (!incomingMsg.isType(HMsg.Type.MSG_REPORT_PROCESS_OPEN)) return true; - // Save the region that is in transition so can test later it came back. - this.regionToFind = incomingMsg.getRegionInfo(); - LOG.info("ABORTING " + this.victim + " because got a " + - HMsg.Type.MSG_REPORT_PROCESS_OPEN + " on this server for " + - incomingMsg.getRegionInfo().getRegionNameAsString()); - this.victim.setHDFSShutdownThreadOnExit(null); - this.victim.abort(); - this.abortSent = true; - return true; - } - - @Override - public boolean process(RegionServerOperation op) throws IOException { - return true; - } - - @Override - public void processed(RegionServerOperation op) { - if (this.closed || !(op instanceof ProcessRegionClose)) return; - ProcessRegionClose close = (ProcessRegionClose)op; - for (HRegion r: this.copyOfOnlineRegions) { - if (r.getRegionInfo().equals(close.regionInfo)) { - // We've closed one of the regions that was on the victim server. - // Now can start testing for when all regions are back online again - LOG.info("Found close of " + - r.getRegionInfo().getRegionNameAsString() + - "; setting close happened flag"); - this.closed = true; - break; - } - } - } - } - - /** - * In 2482, a RS with an opening region on it dies. The said region is then - * stuck in the master's regions-in-transition and never leaves it. This - * test works by bringing up a new regionserver, waiting for the load - * balancer to give it some regions. Then, we close all on the new server. - * After sending all the close messages, we send the new regionserver the - * special blocking message so it can not process any more messages. - * Meantime reopening of the just-closed regions is backed up on the new - * server. Soon as master gets an opening region from the new regionserver, - * we kill it. We then wait on all regions to combe back on line. If bug - * is fixed, this should happen soon as the processing of the killed server is - * done. - * @see HBASE-2482 - */ - @Test public void testKillRSWithOpeningRegion2482() throws Exception { - MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); - // Count how many regions are online. They need to be all back online for - // this test to succeed. - int countOfMetaRegions = countOfMetaRegions(); - // Add a listener on the server. - HMaster m = cluster.getMaster(); - // Start new regionserver. - MiniHBaseClusterRegionServer hrs = - (MiniHBaseClusterRegionServer)cluster.startRegionServer().getRegionServer(); - LOG.info("Started new regionserver: " + hrs.toString()); - // Wait until has some regions before proceeding. Balancer will give it some. - int minimumRegions = - countOfMetaRegions/(cluster.getRegionServerThreads().size() * 2); - while (hrs.getOnlineRegions().size() < minimumRegions) Threads.sleep(100); - // Set the listener only after some regions have been opened on new server. - HBase2482Listener listener = new HBase2482Listener(hrs); - m.getRegionServerOperationQueue(). - registerRegionServerOperationListener(listener); - try { - // Go close all non-catalog regions on this new server - closeAlltNonCatalogRegions(cluster, hrs); - // After all closes, add blocking message before the region opens start to - // come in. - cluster.addMessageToSendRegionServer(hrs, - new HMsg(HMsg.Type.TESTING_MSG_BLOCK_RS)); - // Wait till one of the above close messages has an effect before we start - // wait on all regions back online. - while (!listener.closed) Threads.sleep(100); - LOG.info("Past close"); - // Make sure the abort server message was sent. - while(!listener.abortSent) Threads.sleep(100); - LOG.info("Past abort send; waiting on all regions to redeploy"); - // Now wait for regions to come back online. - assertRegionIsBackOnline(listener.regionToFind); - } finally { - m.getRegionServerOperationQueue(). - unregisterRegionServerOperationListener(listener); - } - } - - - /* - * @param cluster - * @param hrs - * @return Count of regions closed. - * @throws IOException - */ - private int closeAlltNonCatalogRegions(final MiniHBaseCluster cluster, - final MiniHBaseCluster.MiniHBaseClusterRegionServer hrs) - throws IOException { - int countOfRegions = 0; - for (HRegion r: hrs.getOnlineRegions()) { - if (r.getRegionInfo().isMetaRegion()) continue; - cluster.addMessageToSendRegionServer(hrs, - new HMsg(HMsg.Type.MSG_REGION_CLOSE, r.getRegionInfo())); - LOG.info("Sent close of " + r.getRegionInfo().getRegionNameAsString() + - " on " + hrs.toString()); - countOfRegions++; - } - return countOfRegions; - } - - /** * Listener for regionserver events testing hbase-2428 (Infinite loop of * region closes if META region is offline). In particular, listen * for the close of the 'metaServer' and when it comes in, requeue it with a @@ -335,7 +195,8 @@ * in. * @see HBASE-2428 */ - @Test public void testRegionCloseWhenNoMetaHBase2428() throws Exception { + @Test (timeout=300000) public void testRegionCloseWhenNoMetaHBase2428() throws Exception { + LOG.info("Running testRegionCloseWhenNoMetaHBase2428"); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); final HMaster master = cluster.getMaster(); int metaIndex = cluster.getServerWithMeta(); @@ -359,7 +220,7 @@ master.getRegionServerOperationQueue(). registerRegionServerOperationListener(listener); try { - // Now close the server carrying index. + // Now close the server carrying meta. cluster.abortRegionServer(metaIndex); // First wait on receipt of meta server shutdown message. @@ -380,6 +241,196 @@ } } + /** + * Test adding in a new server before old one on same host+port is dead. + * Make the test more onerous by having the server under test carry the meta. + * If confusion between old and new, purportedly meta never comes back. Test + * that meta gets redeployed. + */ + @Test (timeout=300000) public void testAddingServerBeforeOldIsDead2413() throws IOException { + LOG.info("Running testAddingServerBeforeOldIsDead2413"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + int count = count(); + int metaIndex = cluster.getServerWithMeta(); + MiniHBaseClusterRegionServer metaHRS = + (MiniHBaseClusterRegionServer)cluster.getRegionServer(metaIndex); + int port = metaHRS.getServerInfo().getServerAddress().getPort(); + Configuration c = TEST_UTIL.getConfiguration(); + String oldPort = c.get(HConstants.REGIONSERVER_PORT, "0"); + try { + LOG.info("KILLED=" + metaHRS); + metaHRS.kill(); + c.set(HConstants.REGIONSERVER_PORT, Integer.toString(port)); + // Try and start new regionserver. It might clash with the old + // regionserver port so keep trying to get past the BindException. + HRegionServer hrs = null; + while (true) { + try { + hrs = cluster.startRegionServer().getRegionServer(); + break; + } catch (IOException e) { + if (e.getCause() != null && e.getCause() instanceof InvocationTargetException) { + InvocationTargetException ee = (InvocationTargetException)e.getCause(); + if (ee.getCause() != null && ee.getCause() instanceof BindException) { + LOG.info("BindException; retrying: " + e.toString()); + } + } + } + } + LOG.info("STARTED=" + hrs); + // Wait until he's been given at least 3 regions before we go on to try + // and count rows in table. + while (hrs.getOnlineRegions().size() < 3) Threads.sleep(100); + LOG.info(hrs.toString() + " has " + hrs.getOnlineRegions().size() + + " regions"); + assertEquals(count, count()); + } finally { + c.set(HConstants.REGIONSERVER_PORT, oldPort); + } + } + + + /** + * HBase2482 is about outstanding region openings. If any are outstanding + * when a regionserver goes down, then they'll never deploy. They'll be + * stuck in the regions-in-transition list for ever. This listener looks + * for a region opening HMsg and if its from the server passed on construction, + * then we kill it. It also looks out for a close message on the victim + * server because that signifies start of the fireworks. + */ + static class HBase2482Listener implements RegionServerOperationListener { + private final HRegionServer victim; + private boolean abortSent = false; + // We closed regions on new server. + private volatile boolean closed = false; + // Copy of regions on new server + private final Collection copyOfOnlineRegions; + // This is the region that was in transition on the server we aborted. Test + // passes if this region comes back online successfully. + private HRegionInfo regionToFind; + + HBase2482Listener(final HRegionServer victim) { + this.victim = victim; + // Copy regions currently open on this server so I can notice when + // there is a close. + this.copyOfOnlineRegions = + this.victim.getCopyOfOnlineRegionsSortedBySize().values(); + } + + @Override + public boolean process(HServerInfo serverInfo, HMsg incomingMsg) { + if (!victim.getServerInfo().equals(serverInfo) || + this.abortSent || !this.closed) { + return true; + } + if (!incomingMsg.isType(HMsg.Type.MSG_REPORT_PROCESS_OPEN)) return true; + // Save the region that is in transition so can test later it came back. + this.regionToFind = incomingMsg.getRegionInfo(); + LOG.info("ABORTING " + this.victim + " because got a " + + HMsg.Type.MSG_REPORT_PROCESS_OPEN + " on this server for " + + incomingMsg.getRegionInfo().getRegionNameAsString()); + this.victim.abort(); + this.abortSent = true; + return true; + } + + @Override + public boolean process(RegionServerOperation op) throws IOException { + return true; + } + + @Override + public void processed(RegionServerOperation op) { + if (this.closed || !(op instanceof ProcessRegionClose)) return; + ProcessRegionClose close = (ProcessRegionClose)op; + for (HRegion r: this.copyOfOnlineRegions) { + if (r.getRegionInfo().equals(close.regionInfo)) { + // We've closed one of the regions that was on the victim server. + // Now can start testing for when all regions are back online again + LOG.info("Found close of " + + r.getRegionInfo().getRegionNameAsString() + + "; setting close happened flag"); + this.closed = true; + break; + } + } + } + } + + /** + * In 2482, a RS with an opening region on it dies. The said region is then + * stuck in the master's regions-in-transition and never leaves it. This + * test works by bringing up a new regionserver, waiting for the load + * balancer to give it some regions. Then, we close all on the new server. + * After sending all the close messages, we send the new regionserver the + * special blocking message so it can not process any more messages. + * Meantime reopening of the just-closed regions is backed up on the new + * server. Soon as master gets an opening region from the new regionserver, + * we kill it. We then wait on all regions to come back on line. If bug + * is fixed, this should happen soon as the processing of the killed server is + * done. + * @see HBASE-2482 + */ + @Test (timeout=300000) public void testKillRSWithOpeningRegion2482() throws Exception { + LOG.info("Running testKillRSWithOpeningRegion2482"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + // Count how many regions are online. They need to be all back online for + // this test to succeed. + int countOfMetaRegions = countOfMetaRegions(); + // Add a listener on the server. + HMaster m = cluster.getMaster(); + // Start new regionserver. + MiniHBaseClusterRegionServer hrs = + (MiniHBaseClusterRegionServer)cluster.startRegionServer().getRegionServer(); + LOG.info("Started new regionserver: " + hrs.toString()); + // Wait until has some regions before proceeding. Balancer will give it some. + int minimumRegions = + countOfMetaRegions/(cluster.getRegionServerThreads().size() * 2); + while (hrs.getOnlineRegions().size() < minimumRegions) Threads.sleep(100); + // Set the listener only after some regions have been opened on new server. + HBase2482Listener listener = new HBase2482Listener(hrs); + m.getRegionServerOperationQueue(). + registerRegionServerOperationListener(listener); + try { + // Go close all non-catalog regions on this new server + closeAllNonCatalogRegions(cluster, hrs); + // After all closes, add blocking message before the region opens start to + // come in. + cluster.addMessageToSendRegionServer(hrs, + new HMsg(HMsg.Type.TESTING_MSG_BLOCK_RS)); + // Wait till one of the above close messages has an effect before we start + // wait on all regions back online. + while (!listener.closed) Threads.sleep(100); + LOG.info("Past close"); + // Make sure the abort server message was sent. + while(!listener.abortSent) Threads.sleep(100); + LOG.info("Past abort send; waiting on all regions to redeploy"); + // Now wait for regions to come back online. + assertRegionIsBackOnline(listener.regionToFind); + } finally { + m.getRegionServerOperationQueue(). + unregisterRegionServerOperationListener(listener); + } + } + + /* + * @return Count of all non-catalog regions on the designated server + */ + private int closeAllNonCatalogRegions(final MiniHBaseCluster cluster, + final MiniHBaseCluster.MiniHBaseClusterRegionServer hrs) + throws IOException { + int countOfRegions = 0; + for (HRegion r: hrs.getOnlineRegions()) { + if (r.getRegionInfo().isMetaRegion()) continue; + cluster.addMessageToSendRegionServer(hrs, + new HMsg(HMsg.Type.MSG_REGION_CLOSE, r.getRegionInfo())); + LOG.info("Sent close of " + r.getRegionInfo().getRegionNameAsString() + + " on " + hrs.toString()); + countOfRegions++; + } + return countOfRegions; + } + private void assertRegionIsBackOnline(final HRegionInfo hri) throws IOException { // Region should have an entry in its startkey because of addRowToEachRegion. @@ -474,6 +525,23 @@ } /* + * @return Count of rows in TABLENAME + * @throws IOException + */ + private static int count() throws IOException { + HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + int rows = 0; + Scan scan = new Scan(); + ResultScanner s = t.getScanner(scan); + for (Result r = null; (r = s.next()) != null;) { + rows++; + } + s.close(); + LOG.info("Counted=" + rows); + return rows; + } + + /* * @param hri * @return Start key for hri (If start key is '', then return 'aaa'. */ Index: src/java/org/apache/hadoop/hbase/ClusterStatus.java =================================================================== --- src/java/org/apache/hadoop/hbase/ClusterStatus.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/ClusterStatus.java (working copy) @@ -68,7 +68,7 @@ public Collection getServerNames() { ArrayList names = new ArrayList(liveServerInfo.size()); for (HServerInfo server: liveServerInfo) { - names.add(server.getName()); + names.add(server.getHostName()); } return names; } Index: src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -135,6 +135,8 @@ // debugging and unit tests. protected volatile boolean abortRequested; + private volatile boolean killed = false; + // If false, the file system has become unavailable protected volatile boolean fsOk; @@ -630,7 +632,9 @@ hlogRoller.interruptIfNecessary(); this.majorCompactionChecker.interrupt(); - if (abortRequested) { + if (killed) { + // Just skip out w/o closing regions. + } else if (abortRequested) { if (this.fsOk) { // Only try to clean up if the file system is available try { @@ -682,17 +686,27 @@ HBaseRPC.stopProxy(this.hbaseMaster); this.hbaseMaster = null; } - - join(); - this.zooKeeperWrapper.close(); - if (this.shutdownHDFS.get()) { - runThread(this.hdfsShutdownThread, - this.conf.getLong("hbase.dfs.shutdown.wait", 30000)); + + if (!killed) { + join(); + this.zooKeeperWrapper.close(); } - + runHDFSShutdownHook(this.shutdownHDFS.get(), this.hdfsShutdownThread, + this.conf.getLong("hbase.dfs.shutdown.wait", 30000)); LOG.info(Thread.currentThread().getName() + " exiting"); } + /** + * @param runit Run the passed thread? + * @param t Thread to run, if not null. + * @param waittime Time to wait on thread completion. + */ + protected void runHDFSShutdownHook(final boolean runit, final Thread t, + final long waittime) { + if (!runit || t == null) return; + runThread(t, waittime); + } + /* * Add to the passed msgs messages to pass to the master. * @param msgs Current outboundMsgs array; we'll add messages to this List. @@ -742,12 +756,13 @@ /** * Set the hdfs shutdown thread to run on exit. Pass null to disable - * running of the shutdown test. Needed by tests. + * running of the shutdown test. * @param t Thread to run. Pass null to disable tests. - * @return Previous occupant of the shutdown thread position. + * @return Previous occupant of the shutdown thread. */ - public Thread setHDFSShutdownThreadOnExit(final Thread t) { + protected Thread setHDFSShutdownThreadOnExit(final Thread t) { if (t == null) this.shutdownHDFS.set(false); + this.shutdownHDFS.set(true); Thread old = this.hdfsShutdownThread; this.hdfsShutdownThread = t; return old; @@ -794,7 +809,8 @@ // Register shutdown hook for HRegionServer, runs an orderly shutdown // when a kill signal is recieved Runtime.getRuntime().addShutdownHook(new ShutdownThread(this, - Thread.currentThread())); + Thread.currentThread())); + // Suppress the hdfs shutdown hook and run it ourselves below. this.hdfsShutdownThread = suppressHdfsShutdownHook(); this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR)); @@ -1000,21 +1016,25 @@ } /** - * So, HDFS caches FileSystems so when you call FileSystem.get it's fast. In - * order to make sure things are cleaned up, it also creates a shutdown hook - * so that all filesystems can be closed when the process is terminated. This - * conveniently runs concurrently with our own shutdown handler, and - * therefore causes all the filesystems to be closed before the server can do - * all its necessary cleanup. + * So, when you call {@ink FileSystem#get(Configuration)}, a new instance + * is created if none exists for this user currently and it is added to + * a static Cache. In order to make sure things are cleaned up on the way out, + * it also adds a shutdown hook so that ALL filesystem instances in a JVM + * are closed when the process is terminated (The hook, if it runs, + * calls FileSystem.closeAll() which will call close on all FileSystems in the + * cache). This inconveniently runs concurrently with our own shutdown handler, + * and therefore causes all the filesystems to be closed before the server can + * do all its necessary cleanup. * - * The crazy dirty reflection in this method sneaks into the FileSystem cache + *

The crazy dirty reflection in this method sneaks into the FileSystem cache * and grabs the shutdown hook, removes it from the list of active shutdown * hooks, and hangs onto it until later. Then, after we're properly done with * our graceful shutdown, we can execute the hdfs hook manually to make sure * loose ends are tied up. * - * This seems quite fragile and susceptible to breaking if Hadoop changes + *

This seems quite fragile and susceptible to breaking if Hadoop changes * anything about the way this cleanup is managed. Keep an eye on things. + * @return Thread The hook registered by FileSystem to run at shutdown time. */ private Thread suppressHdfsShutdownHook() { try { @@ -1276,6 +1296,16 @@ stop(); } + /* + * Simulate a kill -9 of this server. + * Exits w/o closing regions or cleaninup logs but it does close socket in + * case want to bring up server on old hostname+port immediately. + */ + protected void kill() { + this.killed = true; + abort(); + } + /** * Wait on all threads to finish. * Presumption is that all closes and stops have already been called. Index: src/java/org/apache/hadoop/hbase/HServerInfo.java =================================================================== --- src/java/org/apache/hadoop/hbase/HServerInfo.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/HServerInfo.java (working copy) @@ -29,13 +29,11 @@ /** - * HServerInfo contains metainfo about an HRegionServer, Currently it only - * contains the server start code. - * - * In the future it will contain information about the source machine and - * load statistics. + * HServerInfo is meta info about an HRegionServer. + * Holds hostname, ports and load. */ public class HServerInfo implements WritableComparable { + public static final String SEPARATOR = ","; private HServerAddress serverAddress; private long startCode; private HServerLoad load; @@ -44,7 +42,6 @@ private String name; private static Map dnsCache = new HashMap(); - /** default constructor - used by Writable */ public HServerInfo() { this(new HServerAddress(), 0, HConstants.DEFAULT_REGIONSERVER_INFOPORT, "default name"); @@ -66,7 +63,7 @@ } /** - * Construct a new object using another as input (like a copy constructor) + * Copy-constructor * @param other */ public HServerInfo(HServerInfo other) { @@ -74,109 +71,60 @@ this.startCode = other.getStartCode(); this.load = other.getLoad(); this.infoPort = other.getInfoPort(); - this.name = other.getName(); + this.name = other.getHostName(); } - /** - * @return the load - */ public HServerLoad getLoad() { return load; } - /** - * @param load the load to set - */ public void setLoad(HServerLoad load) { this.load = load; } - /** @return the server address */ public synchronized HServerAddress getServerAddress() { return new HServerAddress(serverAddress); } - /** - * Change the server address. - * @param serverAddress New server address - */ public synchronized void setServerAddress(HServerAddress serverAddress) { this.serverAddress = serverAddress; this.serverName = null; } - - /** @return the server start code */ + public synchronized long getStartCode() { return startCode; } - - /** - * @return Port the info server is listening on. - */ + public int getInfoPort() { return this.infoPort; } - - /** - * @param infoPort - new port of info server - */ + public void setInfoPort(int infoPort) { this.infoPort = infoPort; } - - /** - * @param startCode the startCode to set - */ + public synchronized void setStartCode(long startCode) { this.startCode = startCode; this.serverName = null; } - - /** - * @return the server name in the form hostname_startcode_port - */ - public synchronized String getServerName() { - if (this.serverName == null) { - // if we have the hostname of the RS, use it - if(this.name != null) { - this.serverName = getServerName(this.name, this.serverAddress.getPort(), this.startCode); - } - // go to DNS name resolution only if we dont have the name of the RS - else { - this.serverName = getServerName(this.serverAddress, this.startCode); - } - } - return this.serverName; - } - - /** - * Get the hostname of the server - * @return hostname - */ - public String getName() { + + public String getHostName() { return name; } - - /** - * Set the hostname of the server - * @param name hostname - */ - public void setName(String name) { + + public void setHostName(String name) { this.name = name; } /** - * @see java.lang.Object#toString() + * @return ServerName and load concatenated. */ @Override public String toString() { - return "address: " + this.serverAddress + ", startcode: " + this.startCode - + ", load: (" + this.load.toString() + ")"; + return "serverName=" + getServerName() + + ", load=(" + this.load.toString() + ")"; } - /** - * @see java.lang.Object#equals(java.lang.Object) - */ @Override public boolean equals(Object obj) { if (this == obj) { @@ -191,9 +139,6 @@ return compareTo((HServerInfo)obj) == 0; } - /** - * @see java.lang.Object#hashCode() - */ @Override public int hashCode() { return this.getServerName().hashCode(); @@ -223,17 +168,27 @@ } /** - * @param info - * @return the server name in the form hostname_startcode_port + * @return Server name as <hostname> ',' <startcode> ',' <port> */ - private static String getServerName(HServerInfo info) { - return getServerName(info.getServerAddress(), info.getStartCode()); + public synchronized String getServerName() { + if (this.serverName == null) { + // if we have the hostname of the RS, use it + if(this.name != null) { + this.serverName = + getServerName(this.name, this.serverAddress.getPort(), this.startCode); + } + // go to DNS name resolution only if we dont have the name of the RS + else { + this.serverName = getServerName(this.serverAddress, this.startCode); + } + } + return this.serverName; } - + /** - * @param serverAddress in the form hostname:port - * @param startCode - * @return the server name in the form hostname_startcode_port + * @param serverAddress In form hostname:port + * @param startCode Server startcode + * @return Server name as <hostname> ',' <startcode> ',' <port> */ public static String getServerName(String serverAddress, long startCode) { String name = null; @@ -256,20 +211,27 @@ } /** - * @param address - * @param startCode - * @return the server name in the form hostname_startcode_port + * @param address Server address + * @param startCode Server startcode + * @return Server name as <hostname> ',' <startcode> ',' <port> */ public static String getServerName(HServerAddress address, long startCode) { return getServerName(address.getHostname(), address.getPort(), startCode); } + /* + * @param hostName + * @param port + * @param startCode + * @return Server name made from hostName, port, and startcode in the form: + * <hostname> ',' <startcode> ',' <port> + */ private static String getServerName(String hostName, int port, long startCode) { StringBuilder name = new StringBuilder(hostName); - name.append(","); + name.append(SEPARATOR); name.append(port); - name.append(","); + name.append(SEPARATOR); name.append(startCode); return name.toString(); } -} +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -104,7 +104,6 @@ private final int nobalancingCount; class ServerMonitor extends Chore { - ServerMonitor(final int period, final AtomicBoolean stop) { super(period, stop); } @@ -117,18 +116,21 @@ if (numDeadServers > 0) { StringBuilder sb = new StringBuilder("Dead Server ["); boolean first = true; - for (String server: deadServers) { - if (!first) { - sb.append(", "); - first = false; + synchronized (deadServers) { + for (String server: deadServers) { + if (!first) { + sb.append(", "); + first = false; + } + sb.append(server); } - sb.append(server); } sb.append("]"); deadServersList = sb.toString(); } LOG.info(numServers + " region servers, " + numDeadServers + - " dead, average load " + averageLoad + (deadServersList != null? deadServers: "")); + " dead, average load " + averageLoad + + (deadServersList != null? deadServers: "")); } } @@ -151,45 +153,32 @@ /** * Let the server manager know a new regionserver has come online * @param serverInfo - * @throws Leases.LeaseStillHeldException + * @throws IOException */ public void regionServerStartup(final HServerInfo serverInfo) - throws Leases.LeaseStillHeldException { + throws IOException { + // Test for case where we get a region startup message from a regionserver + // that has been quickly restarted but whose znode expiration handler has + // not yet run, or from a server whose fail we are currently processing. HServerInfo info = new HServerInfo(serverInfo); - String serverName = info.getServerName(); - if (serversToServerInfo.containsKey(serverName) || - deadServers.contains(serverName)) { - LOG.debug("Server start was rejected: " + serverInfo); - LOG.debug("serversToServerInfo.containsKey: " + serversToServerInfo.containsKey(serverName)); - LOG.debug("deadServers.contains: " + deadServers.contains(serverName)); - throw new Leases.LeaseStillHeldException(serverName); - } - - LOG.info("Received start message from: " + serverName); - // Go on to process the regionserver registration. - HServerLoad load = serversToLoad.remove(serverName); - if (load != null) { - // The startup message was from a known server. - // Remove stale information about the server's load. - synchronized (loadToServers) { - Set servers = loadToServers.get(load); - if (servers != null) { - servers.remove(serverName); - if (servers.size() > 0) - loadToServers.put(load, servers); - else - loadToServers.remove(load); - } + String hostAndPort = info.getServerAddress().toString(); + HServerInfo existingServer = + this.serverAddressToServerInfo.get(info.getServerAddress()); + if (existingServer != null) { + LOG.info("Server start rejected; we already have " + hostAndPort + + " registered; existingServer=" + existingServer + ", newServer=" + info); + if (existingServer.getStartCode() < info.getStartCode()) { + LOG.info("Triggering server recovery; existingServer looks stale"); + expireServer(existingServer); } + throw new Leases.LeaseStillHeldException(hostAndPort); } - HServerInfo storedInfo = serversToServerInfo.remove(serverName); - if (storedInfo != null && !master.closed.get()) { - // The startup message was from a known server with the same name. - // Timeout the old one right away. - master.getRootRegionLocation(); - RegionServerOperation op = new ProcessServerShutdown(master, storedInfo); - this.master.getRegionServerOperationQueue().put(op); + if (isDead(hostAndPort, true)) { + LOG.debug("Server start rejected; currently processing " + hostAndPort + + " failure"); + throw new Leases.LeaseStillHeldException(hostAndPort); } + LOG.info("Received start message from: " + info.getServerName()); recordNewServer(info); } @@ -214,7 +203,7 @@ info.setLoad(load); // We must set this watcher here because it can be set on a fresh start // or on a failover - Watcher watcher = new ServerExpirer(serverName, info.getServerAddress()); + Watcher watcher = new ServerExpirer(new HServerInfo(info)); master.getZooKeeperWrapper().updateRSLocationGetWatch(info, watcher); serversToServerInfo.put(serverName, info); serverAddressToServerInfo.put(info.getServerAddress(), info); @@ -313,7 +302,7 @@ synchronized (serversToServerInfo) { removeServerInfo(info.getServerName(), info.getServerAddress()); - serversToServerInfo.notifyAll(); + notifyServers(); } return new HMsg[] {REGIONSERVER_STOP}; @@ -330,57 +319,47 @@ */ private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) { assert msgs[0].getType() == Type.MSG_REPORT_EXITING; - synchronized (serversToServerInfo) { - try { - // This method removes ROOT/META from the list and marks them to be reassigned - // in addition to other housework. - if (removeServerInfo(serverInfo.getServerName(), - serverInfo.getServerAddress())) { - // Only process the exit message if the server still has registered info. - // Otherwise we could end up processing the server exit twice. - LOG.info("Region server " + serverInfo.getServerName() + - ": MSG_REPORT_EXITING"); - // Get all the regions the server was serving reassigned - // (if we are not shutting down). - if (!master.closed.get()) { - for (int i = 1; i < msgs.length; i++) { - LOG.info("Processing " + msgs[i] + " from " + - serverInfo.getServerName()); - assert msgs[i].getType() == Type.MSG_REGION_CLOSE; - HRegionInfo info = msgs[i].getRegionInfo(); - // Meta/root region offlining is handed in removeServerInfo above. - if (!info.isMetaRegion()) { - synchronized (master.regionManager) { - if (!master.regionManager.isOfflined( - info.getRegionNameAsString())) { - master.regionManager.setUnassigned(info, true); - } else { - master.regionManager.removeRegion(info); - } + // This method removes ROOT/META from the list and marks them to be + // reassigned in addition to other housework. + if (removeServerInfo(serverInfo.getServerName(), serverInfo.getServerAddress())) { + // Only process the exit message if the server still has registered info. + // Otherwise we could end up processing the server exit twice. + LOG.info("Region server " + serverInfo.getServerName() + + ": MSG_REPORT_EXITING"); + // Get all the regions the server was serving reassigned + // (if we are not shutting down). + if (!master.closed.get()) { + for (int i = 1; i < msgs.length; i++) { + LOG.info("Processing " + msgs[i] + " from " + + serverInfo.getServerName()); + assert msgs[i].getType() == Type.MSG_REGION_CLOSE; + HRegionInfo info = msgs[i].getRegionInfo(); + // Meta/root region offlining is handed in removeServerInfo above. + if (!info.isMetaRegion()) { + synchronized (master.regionManager) { + if (!master.regionManager.isOfflined(info.getRegionNameAsString())) { + master.regionManager.setUnassigned(info, true); + } else { + master.regionManager.removeRegion(info); } } } } - - // There should not be any regions in transition for this server - the - // server should finish transitions itself before closing - Map inTransition = - master.regionManager.getRegionsInTransitionOnServer( - serverInfo.getServerName()); - for (Map.Entry entry : inTransition.entrySet()) { - LOG.warn("Region server " + serverInfo.getServerName() + - " shut down with region " + entry.getKey() + " in transition " + - "state " + entry.getValue()); - master.regionManager.setUnassigned(entry.getValue().getRegionInfo(), true); - } } - // We don't need to return anything to the server because it isn't - // going to do any more work. - } finally { - serversToServerInfo.notifyAll(); + // There should not be any regions in transition for this server - the + // server should finish transitions itself before closing + Map inTransition = master.regionManager + .getRegionsInTransitionOnServer(serverInfo.getServerName()); + for (Map.Entry entry : inTransition.entrySet()) { + LOG.warn("Region server " + serverInfo.getServerName() + + " shut down with region " + entry.getKey() + " in transition " + + "state " + entry.getValue()); + master.regionManager.setUnassigned(entry.getValue().getRegionInfo(), + true); + } } - } + } } /** @@ -824,44 +803,58 @@ /** Watcher triggered when a RS znode is deleted */ private class ServerExpirer implements Watcher { - private String server; - private HServerAddress serverAddress; + private HServerInfo server; - ServerExpirer(String server, HServerAddress serverAddress) { - this.server = server; - this.serverAddress = serverAddress; + ServerExpirer(final HServerInfo hsi) { + this.server = hsi; } public void process(WatchedEvent event) { - if (event.getType().equals(EventType.NodeDeleted)) { - LOG.info(server + " znode expired"); - // Remove the server from the known servers list and update load info - serverAddressToServerInfo.remove(serverAddress); - HServerInfo info = serversToServerInfo.remove(server); - if (info != null) { - String serverName = info.getServerName(); - HServerLoad load = serversToLoad.remove(serverName); - if (load != null) { - synchronized (loadToServers) { - Set servers = loadToServers.get(load); - if (servers != null) { - servers.remove(serverName); - if(servers.size() > 0) - loadToServers.put(load, servers); - else - loadToServers.remove(load); - } - } - } - deadServers.add(server); - RegionServerOperation op = new ProcessServerShutdown(master, info); - master.getRegionServerOperationQueue().put(op); + if (!event.getType().equals(EventType.NodeDeleted)) { + LOG.warn("Unexpected event=" + event); + return; + } + LOG.info(this.server.getServerName() + " znode expired"); + expireServer(this.server); + } + } + + /* + * Expire the passed server. Add it to list of deadservers and queue a + * shutdown processing. + */ + private synchronized void expireServer(final HServerInfo hsi) { + // First check a server to expire. ServerName is of the form: + // , , + String serverName = hsi.getServerName(); + HServerInfo info = this.serversToServerInfo.get(serverName); + if (info == null) { + LOG.warn("No HServerInfo for " + serverName); + return; + } + if (this.deadServers.contains(serverName)) { + LOG.warn("Already processing shutdown of " + serverName); + return; + } + // Remove the server from the known servers lists and update load info + this.serverAddressToServerInfo.remove(info.getServerAddress()); + this.serversToServerInfo.remove(serverName); + HServerLoad load = this.serversToLoad.remove(serverName); + if (load != null) { + synchronized (this.loadToServers) { + Set servers = this.loadToServers.get(load); + if (servers != null) { + servers.remove(serverName); + if (servers.size() <= 0) this.loadToServers.remove(load); } - synchronized (serversToServerInfo) { - serversToServerInfo.notifyAll(); - } } } + // Add to dead servers and queue a shutdown processing. + LOG.debug("Added=" + serverName + + " to dead servers, added shutdown processing operation"); + this.deadServers.add(serverName); + this.master.getRegionServerOperationQueue(). + put(new ProcessServerShutdown(master, info)); } /** @@ -875,10 +868,33 @@ * @param serverName * @return true if server is dead */ - public boolean isDead(String serverName) { - return deadServers.contains(serverName); + public boolean isDead(final String serverName) { + return isDead(serverName, false); } + /** + * @param serverName Servername as either host:port or + * host,port,startcode. + * @param hostAndPortOnly True if serverName is host and + * port only (host:port) and if so, then we do a prefix compare + * (ignoring start codes) looking for dead server. + * @return true if server is dead + */ + boolean isDead(final String serverName, final boolean hostAndPortOnly) { + return isDead(this.deadServers, serverName, hostAndPortOnly); + } + + static boolean isDead(final Set deadServers, + final String serverName, final boolean hostAndPortOnly) { + if (!hostAndPortOnly) return deadServers.contains(serverName); + String serverNameColonReplaced = + serverName.replaceFirst(":", HServerInfo.SEPARATOR); + for (String hostPortStartCode: deadServers) { + if (hostPortStartCode.startsWith(serverNameColonReplaced)) return true; + } + return false; + } + public boolean canAssignUserRegions() { if (minimumServerCount == 0) { return true; @@ -889,4 +905,4 @@ public void setMinimumServerCount(int minimumServerCount) { this.minimumServerCount = minimumServerCount; } -} \ No newline at end of file +} Index: src/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/HMaster.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -498,7 +498,7 @@ HRegionInterface hri = this.connection.getHRegionConnection(address, false); HServerInfo info = hri.getHServerInfo(); - LOG.debug("Inspection found server " + info.getName()); + LOG.debug("Inspection found server " + info.getHostName()); serverManager.recordNewServer(info, true); HRegionInfo[] regions = hri.getRegionsAssignment(); for (HRegionInfo region : regions) { Index: src/java/org/apache/hadoop/hbase/master/RegionManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/RegionManager.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/master/RegionManager.java (working copy) @@ -1434,7 +1434,7 @@ } // check if current server is overloaded - int numRegionsToClose = balanceFromOverloaded(servLoad, avg); + int numRegionsToClose = balanceFromOverloaded(info, servLoad, avg); // check if we can unload server by low loaded servers if(numRegionsToClose <= 0) { @@ -1456,13 +1456,14 @@ * Check if server load is not overloaded (with load > avgLoadPlusSlop). * @return number of regions to unassign. */ - private int balanceFromOverloaded(HServerLoad srvLoad, double avgLoad) { + private int balanceFromOverloaded(final HServerInfo info, + final HServerLoad srvLoad, final double avgLoad) { int avgLoadPlusSlop = (int)Math.ceil(avgLoad * (1 + this.slop)); int numSrvRegs = srvLoad.getNumberOfRegions(); if (numSrvRegs > avgLoadPlusSlop) { if (LOG.isDebugEnabled()) { - LOG.debug("Server is overloaded: load=" + numSrvRegs + - ", avg=" + avgLoad + ", slop=" + this.slop); + LOG.debug("Server " + info.getServerName() + " is overloaded: load=" + + numSrvRegs + ", avg=" + avgLoad + ", slop=" + this.slop); } return numSrvRegs - (int)Math.ceil(avgLoad); } Index: src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java (working copy) @@ -34,12 +34,17 @@ private long expire; protected final HMaster master; protected final int numRetries; + /* How long we stay on queue. + */ private int expirationDuration; protected RegionServerOperation(HMaster master) { this.master = master; this.numRetries = master.numRetries; - this.expirationDuration = this.master.leaseTimeout/2; + // this.master.leaseTimeout is 120 by default. 120/2, which is what it + // used to be, is a long time to wait on something coming around again. + // Set a max of 10 seconds to wait. + this.expirationDuration = Math.min(this.master.leaseTimeout/10, 10); resetExpiration(); } Index: src/java/org/apache/hadoop/hbase/HServerAddress.java =================================================================== --- src/java/org/apache/hadoop/hbase/HServerAddress.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/HServerAddress.java (working copy) @@ -27,14 +27,12 @@ import java.net.InetSocketAddress; /** - * HServerAddress is a "label" for a HBase server that combines the host - * name and port number. + * HServerAddress is a "label" for a HBase server made of host and port number. */ public class HServerAddress implements WritableComparable { private InetSocketAddress address; String stringValue; - /** Empty constructor, used for Writable */ public HServerAddress() { this.address = null; this.stringValue = null; @@ -51,9 +49,7 @@ } /** - * Construct a HServerAddress from a string of the form hostname:port - * - * @param hostAndPort format 'hostname:port' + * @param hostAndPort Hostname and port formatted as <hostname> ':' <port> */ public HServerAddress(String hostAndPort) { int colonIndex = hostAndPort.lastIndexOf(':'); @@ -68,9 +64,8 @@ } /** - * Construct a HServerAddress from hostname, port number - * @param bindAddress host name - * @param port port number + * @param bindAddress Hostname + * @param port Port number */ public HServerAddress(String bindAddress, int port) { this.address = new InetSocketAddress(bindAddress, port); @@ -78,48 +73,45 @@ } /** - * Construct a HServerAddress from another HServerAddress + * Copy-constructor * - * @param other the HServerAddress to copy from + * @param other HServerAddress to copy from */ public HServerAddress(HServerAddress other) { String bindAddress = other.getBindAddress(); int port = other.getPort(); - address = new InetSocketAddress(bindAddress, port); + this.address = new InetSocketAddress(bindAddress, port); stringValue = bindAddress + ":" + port; } - /** @return bind address */ + /** @return Bind address */ public String getBindAddress() { - return address.getAddress().getHostAddress(); + return this.address.getAddress().getHostAddress(); } - /** @return port number */ + /** @return Port number */ public int getPort() { - return address.getPort(); + return this.address.getPort(); } - /** @return host name */ + /** @return Hostname */ public String getHostname() { - return address.getHostName(); + return this.address.getHostName(); } - /** @return the InetSocketAddress */ + /** @return The InetSocketAddress */ public InetSocketAddress getInetSocketAddress() { - return address; + return this.address; } /** - * @see java.lang.Object#toString() + * @return String formatted as <bind address> ':' <port> */ @Override public String toString() { - return (stringValue == null ? "" : stringValue); + return (this.stringValue == null ? "" : this.stringValue); } - /** - * @see java.lang.Object#equals(java.lang.Object) - */ @Override public boolean equals(Object o) { if (this == o) { @@ -134,9 +126,6 @@ return this.compareTo((HServerAddress)o) == 0; } - /** - * @see java.lang.Object#hashCode() - */ @Override public int hashCode() { int result = this.address.hashCode(); Index: src/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java =================================================================== --- src/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java (revision 942003) +++ src/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java (working copy) @@ -112,15 +112,6 @@ public static void shutdown(final HMaster master, final List regionservers) { LOG.debug("Shutting down HBase Cluster"); - // Be careful how the hdfs shutdown thread runs in context where more than - // one regionserver in the mix. - Thread hdfsClientFinalizer = null; - for (JVMClusterUtil.RegionServerThread t: regionservers) { - Thread tt = t.getRegionServer().setHDFSShutdownThreadOnExit(null); - if (hdfsClientFinalizer == null && tt != null) { - hdfsClientFinalizer = tt; - } - } if (master != null) { master.shutdown(); } @@ -147,14 +138,6 @@ } } } - if (hdfsClientFinalizer != null) { - // Don't run the shutdown thread. Plays havoc if we try to start a - // minihbasecluster immediately after this one has gone down (In - // Filesystem, the shutdown thread is kept in a static and is created - // on classloading. Can only run it once). - // hdfsClientFinalizer.start(); - // Threads.shutdown(hdfsClientFinalizer); - } LOG.info("Shutdown " + ((regionservers != null)? master.getName(): "0 masters") + " " + regionservers.size() + " region server(s)");