Index: src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 952866) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -811,4 +811,20 @@ Thread.sleep(500); } } + + /** + * Make sure that at least the specified number of region servers + * are running + * @param num minimum number of region servers that should be running + * @throws IOException + */ + public void ensureSomeRegionServersAvailable(final int num) + throws IOException { + if (this.getHBaseCluster().getLiveRegionServerThreads().size() < num) { + // Need at least "num" servers. + LOG.info("Started new server=" + + this.getHBaseCluster().startRegionServer()); + + } + } } Index: src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (revision 952866) +++ src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (working copy) @@ -78,6 +78,9 @@ private final Map> messages = new ConcurrentHashMap>(); + private final Map exceptions = + new ConcurrentHashMap(); + public MiniHBaseClusterMaster(final Configuration conf) throws IOException { super(conf); @@ -99,9 +102,26 @@ } } + void addException(final HServerInfo hsi, final IOException ex) { + this.exceptions.put(hsi, ex); + } + + /** + * This implementation is special, exceptions will be treated first and + * message won't be sent back to the region servers even if some are + * specified. + * @param hsi the rs + * @param msgs Messages to add to + * @return + * @throws IOException will be throw if any added for this region server + */ @Override protected HMsg[] adornRegionServerAnswer(final HServerInfo hsi, - final HMsg[] msgs) { + final HMsg[] msgs) throws IOException { + IOException ex = this.exceptions.remove(hsi); + if (ex != null) { + throw ex; + } HMsg [] answerMsgs = msgs; synchronized (this.messages) { List hmsgs = this.messages.get(hsi); @@ -385,6 +405,31 @@ } /** + * Add an exception to send when a region server checks back in + * @param serverNumber Which server to send it to + * @param ex The exception that will be sent + * @throws IOException + */ + public void addExceptionToSendRegionServer(final int serverNumber, + IOException ex) throws IOException { + MiniHBaseClusterRegionServer hrs = + (MiniHBaseClusterRegionServer)getRegionServer(serverNumber); + addExceptionToSendRegionServer(hrs, ex); + } + + /** + * Add an exception to send when a region server checks back in + * @param hrs Which server to send it to + * @param ex The exception that will be sent + * @throws IOException + */ + public void addExceptionToSendRegionServer( + final MiniHBaseClusterRegionServer hrs, IOException ex) + throws IOException { + ((MiniHBaseClusterMaster)getMaster()).addException(hrs.getHServerInfo(),ex); + } + + /** * Add a message to include in the responses send a regionserver when it * checks back in. * @param serverNumber Which server to send it to. Index: src/test/java/org/apache/hadoop/hbase/master/TestKillingServersFromMaster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestKillingServersFromMaster.java (revision 952866) +++ src/test/java/org/apache/hadoop/hbase/master/TestKillingServersFromMaster.java (working copy) @@ -29,17 +29,21 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; +import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -public class TestMasterWrongRS { +public class TestKillingServersFromMaster { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static MiniHBaseCluster cluster; @BeforeClass public static void beforeAllTests() throws Exception { - TEST_UTIL.startMiniCluster(3); + TEST_UTIL.startMiniCluster(2); + cluster = TEST_UTIL.getHBaseCluster(); } @AfterClass @@ -47,26 +51,53 @@ TEST_UTIL.shutdownMiniCluster(); } + @Before + public void setup() throws IOException { + TEST_UTIL.ensureSomeRegionServersAvailable(2); + } + /** - * Test when region servers start reporting with the wrong address - * or start code. Currently the decision is to shut them down. + * Test that a region server that reports with the wrong start code + * gets shut down * See HBASE-2613 * @throws Exception */ @Test (timeout=180000) - public void testRsReportsWrongServerName() throws Exception { - MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + public void testRsReportsWrongStartCode() throws Exception { MiniHBaseClusterRegionServer firstServer = (MiniHBaseClusterRegionServer)cluster.getRegionServer(0); - HRegionServer secondServer = cluster.getRegionServer(1); HServerInfo hsi = firstServer.getServerInfo(); + // This constructor creates a new startcode firstServer.setHServerInfo(new HServerInfo(hsi.getServerAddress(), hsi.getInfoPort(), hsi.getHostname())); + cluster.waitOnRegionServer(0); + assertEquals(1, cluster.getLiveRegionServerThreads().size()); + } + /** + * Test that a region server that reports with the wrong address + * gets shut down + * See HBASE-2613 + * @throws Exception + */ + @Test (timeout=180000) + public void testRsReportsWrongAddress() throws Exception { + MiniHBaseClusterRegionServer firstServer = + (MiniHBaseClusterRegionServer)cluster.getRegionServer(0); + firstServer.getHServerInfo().setServerAddress( + new HServerAddress("0.0.0.0", 60010)); cluster.waitOnRegionServer(0); - assertEquals(2, cluster.getLiveRegionServerThreads().size()); + assertEquals(1, cluster.getLiveRegionServerThreads().size()); + } - secondServer.getHServerInfo().setServerAddress(new HServerAddress("0.0.0.0", 60010)); + /** + * Send a YouAreDeadException to the region server and expect it to shut down + * See HBASE-2691 + * @throws Exception + */ + @Test (timeout=180000) + public void testSendYouAreDead() throws Exception { + cluster.addExceptionToSendRegionServer(0, new YouAreDeadException("bam!")); cluster.waitOnRegionServer(0); assertEquals(1, cluster.getLiveRegionServerThreads().size()); } Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java (revision 952866) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java (working copy) @@ -93,12 +93,7 @@ } @Before public void setup() throws IOException { - if (TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size() < 2) { - // Need at least two servers. - LOG.info("Started new server=" + - TEST_UTIL.getHBaseCluster().startRegionServer()); - - } + TEST_UTIL.ensureSomeRegionServersAvailable(2); } /** Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterWrongRS.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterWrongRS.java (revision 952866) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterWrongRS.java (working copy) @@ -1,73 +0,0 @@ -/** - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; - -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; -import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; -import org.apache.hadoop.hbase.regionserver.HRegionServer; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestMasterWrongRS { - private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); - - @BeforeClass - public static void beforeAllTests() throws Exception { - TEST_UTIL.startMiniCluster(3); - } - - @AfterClass - public static void afterAllTests() throws IOException { - TEST_UTIL.shutdownMiniCluster(); - } - - /** - * Test when region servers start reporting with the wrong address - * or start code. Currently the decision is to shut them down. - * See HBASE-2613 - * @throws Exception - */ - @Test (timeout=180000) - public void testRsReportsWrongServerName() throws Exception { - MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); - MiniHBaseClusterRegionServer firstServer = - (MiniHBaseClusterRegionServer)cluster.getRegionServer(0); - HRegionServer secondServer = cluster.getRegionServer(1); - HServerInfo hsi = firstServer.getServerInfo(); - firstServer.setHServerInfo(new HServerInfo(hsi.getServerAddress(), - hsi.getInfoPort(), hsi.getHostname())); - - cluster.waitOnRegionServer(0); - assertEquals(2, cluster.getLiveRegionServerThreads().size()); - - secondServer.getHServerInfo().setServerAddress(new HServerAddress("0.0.0.0", 60010)); - cluster.waitOnRegionServer(0); - assertEquals(1, cluster.getLiveRegionServerThreads().size()); - } -} Index: src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java (revision 0) @@ -0,0 +1,34 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; + +/** + * This exception is thrown by the master when a region server was shut down + * and restarted so fast that the master still hasn't processed the server + * shutdown of the first instance. + */ +public class PleaseHoldException extends IOException { + + public PleaseHoldException(String message) { + super(message); + } +} Index: src/main/java/org/apache/hadoop/hbase/YouAreDeadException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/YouAreDeadException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/YouAreDeadException.java (revision 0) @@ -0,0 +1,34 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; + +/** + * This exception is thrown by the master when a region server reports and is + * already being processed as dead. This can happen when a region server loses + * its session but didn't figure it yet. + */ +public class YouAreDeadException extends IOException { + + public YouAreDeadException(String message) { + super(message); + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 952866) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -41,9 +41,11 @@ import org.apache.hadoop.hbase.Leases.LeaseStillHeldException; import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.PleaseHoldException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.UnknownRowLockException; import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.MultiPut; @@ -524,9 +526,15 @@ continue; } } catch (Exception e) { // FindBugs REC_CATCH_EXCEPTION + // Two special exceptions could be printed out here, + // PleaseHoldException and YouAreDeadException if (e instanceof IOException) { e = RemoteExceptionHandler.checkIOException((IOException) e); } + if (e instanceof YouAreDeadException) { + // This will be caught and handled as a fatal error below + throw e; + } tries++; if (tries > 0 && (tries % this.numRetries) == 0) { // Check filesystem every so often. Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 952866) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -30,8 +30,8 @@ import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; -import org.apache.hadoop.hbase.Leases; -import org.apache.hadoop.hbase.Leases.LeaseStillHeldException; +import org.apache.hadoop.hbase.PleaseHoldException; +import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -177,13 +177,14 @@ String hostAndPort = info.getServerAddress().toString(); HServerInfo existingServer = haveServerWithSameHostAndPortAlready(info.getHostnamePort()); if (existingServer != null) { - LOG.info("Server start rejected; we already have " + hostAndPort + - " registered; existingServer=" + existingServer + ", newServer=" + info); + String message = "Server start rejected; we already have " + hostAndPort + + " registered; existingServer=" + existingServer + ", newServer=" + info; + LOG.info(message); if (existingServer.getStartCode() < info.getStartCode()) { LOG.info("Triggering server recovery; existingServer looks stale"); expireServer(existingServer); } - throw new Leases.LeaseStillHeldException(hostAndPort); + throw new PleaseHoldException(message); } checkIsDead(info.getServerName(), "STARTUP"); LOG.info("Received start message from: " + info.getServerName()); @@ -208,11 +209,12 @@ * @throws LeaseStillHeldException */ private void checkIsDead(final String serverName, final String what) - throws LeaseStillHeldException { + throws YouAreDeadException { if (!isDead(serverName)) return; - LOG.debug("Server " + what + " rejected; currently processing " + - serverName + " as dead server"); - throw new Leases.LeaseStillHeldException(serverName); + String message = "Server " + what + " rejected; currently processing " + + serverName + " as dead server"; + LOG.debug(message); + throw new YouAreDeadException(message); } /** Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 952866) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -679,11 +679,13 @@ /** * Override if you'd add messages to return to regionserver hsi - * @param messages Messages to add to + * or to send an exception. + * @param msgs Messages to add to * @return Messages to return to + * @throws IOException exceptions that were injected for the region servers */ protected HMsg [] adornRegionServerAnswer(final HServerInfo hsi, - final HMsg [] msgs) { + final HMsg [] msgs) throws IOException { return msgs; }