diff --git src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 8ee6782..6243437 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1552,10 +1552,8 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, public void stop(final String msg) { this.stopped = true; LOG.info("STOPPED: " + msg); - synchronized (this) { - // Wakes run() if it is sleeping - notifyAll(); // FindBugs NN_NAKED_NOTIFY - } + // Wakes run() if it is sleeping + sleeper.skipSleepCycle(); } @Override diff --git src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java index 120f382..13edb36 100644 --- src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java +++ src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionServerCoprocessorExceptionWithAbort.java @@ -29,6 +29,9 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -44,6 +47,37 @@ import static org.junit.Assert.*; */ @Category(MediumTests.class) public class TestRegionServerCoprocessorExceptionWithAbort { + + private class zkwAbortable implements Abortable { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException("Fatal ZK rs tracker error, why=", e); + } + @Override + public boolean isAborted() { + return false; + } + }; + + private class RSTracker extends ZooKeeperNodeTracker { + public boolean regionZKNodeWasDeleted = false; + public String rsNode; + private Thread mainThread; + + public RSTracker(ZooKeeperWatcher zkw, String rsNode, Thread mainThread) { + super(zkw, rsNode, new zkwAbortable()); + this.rsNode = rsNode; + this.mainThread = mainThread; + } + + @Override + public synchronized void nodeDeleted(String path) { + if (path.equals(rsNode)) { + regionZKNodeWasDeleted = true; + mainThread.interrupt(); + } + } + } private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @BeforeClass @@ -75,32 +109,45 @@ public class TestRegionServerCoprocessorExceptionWithAbort { TEST_UTIL.createMultiRegions(table, TEST_FAMILY)); // Note which regionServer will abort (after put is attempted). - HRegionServer regionServer = + final HRegionServer regionServer = TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE); + + // add watch so we can know when this regionserver aborted. + ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), + "unittest", new zkwAbortable()); + + RSTracker rsTracker = new RSTracker(zkw, + "/hbase/rs/"+regionServer.getServerName(), Thread.currentThread()); + rsTracker.start(); + zkw.registerListener(rsTracker); + + boolean caughtInterruption = false; try { final byte[] ROW = Bytes.toBytes("aaa"); Put put = new Put(ROW); put.add(TEST_FAMILY, ROW, ROW); table.put(put); } catch (IOException e) { - fail("put() failed: " + e); - } - // Wait up to 30 seconds for regionserver to abort. - boolean regionServerAborted = false; - for (int i = 0; i < 30; i++) { - if (regionServer.isAborted()) { - regionServerAborted = true; - break; + // Depending on exact timing of the threads involved, zkw's interruption + // might be caught here ... + if (e.getCause().getClass().equals(InterruptedException.class)) { + caughtInterruption = true; + } else { + fail("put() failed: " + e); } + } + if (caughtInterruption == false) { try { - Thread.sleep(1000); + Thread.sleep(30000); + fail("RegionServer did not abort within 30 seconds."); } catch (InterruptedException e) { - fail("InterruptedException while waiting for regionserver " + - "zk node to be deleted."); + // .. or it might be caught here. + caughtInterruption = true; } } + assertTrue("Main thread caught interruption.",caughtInterruption); assertTrue("RegionServer aborted on coprocessor exception, as expected.", - regionServerAborted); + rsTracker.regionZKNodeWasDeleted); } public static class BuggyRegionObserver extends SimpleRegionObserver {