Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java (revision 1180297) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java (working copy) @@ -94,9 +94,11 @@ // Start transaction. SplitTransaction st = prepareGOOD_SPLIT_ROW(); SplitTransaction spiedUponSt = spy(st); - Mockito.doThrow(new MockedFailedDaughterOpen()). - when(spiedUponSt).openDaughterRegion((Server)Mockito.anyObject(), - (RegionServerServices)Mockito.anyObject(), (HRegion)Mockito.anyObject()); + Mockito + .doThrow(new MockedFailedDaughterOpen()) + .when(spiedUponSt) + .openDaughterRegion((Server) Mockito.anyObject(), + (HRegion) Mockito.anyObject()); // Run the execute. Look at what it returns. boolean expectedException = false; Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java (revision 0) @@ -0,0 +1,107 @@ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.RegionOfflineException; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.PairOfSameType; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +public class TestEndToEndSplitTransaction { + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void beforeAllTests() throws Exception { + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5); + TEST_UTIL.startMiniCluster(1); + } + + @AfterClass + public static void afterAllTests() throws IOException { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testMasterOpsWhileSplitting() throws Exception { + byte[] tableName = Bytes.toBytes("TestSplit"); + byte[] familyName = Bytes.toBytes("fam"); + TEST_UTIL.createTable(tableName, familyName); + TEST_UTIL.loadTable(new HTable(TEST_UTIL.getConfiguration(), tableName), + familyName); + HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0); + byte []firstRow = Bytes.toBytes("aaa"); + byte []splitRow = Bytes.toBytes("lll"); + byte []lastRow = Bytes.toBytes("zzz"); + HConnection con = HConnectionManager + .getConnection(TEST_UTIL.getConfiguration()); + // this will also cache the region + byte[] regionName = con.locateRegion(tableName, splitRow).getRegionInfo() + .getRegionName(); + HRegion region = server.getRegion(regionName); + SplitTransaction split = new SplitTransaction(region, splitRow); + split.prepare(); + + // 1. phase I + PairOfSameType regions = split.createDaughters(server, server); + assertFalse(test(con, tableName, firstRow, server)); + assertFalse(test(con, tableName, lastRow, server)); + + // passing null as services prevents final step + // 2, most of phase II + split.openDaughters(server, null, regions.getFirst(), regions.getSecond()); + assertFalse(test(con, tableName, firstRow, server)); + assertFalse(test(con, tableName, lastRow, server)); + + // 3. finish phase II + // note that this replicates some code from SplitTransaction + // 2nd daughter first + server.postOpenDeployTasks(regions.getSecond(), server.getCatalogTracker(), true); + // THIS is the crucial point: + // the 2nd daughter was added, so querying before the split key should fail. + assertFalse(test(con, tableName, firstRow, server)); + // past splitkey is ok. + assertTrue(test(con, tableName, lastRow, server)); + + // first daughter second + server.postOpenDeployTasks(regions.getFirst(), server.getCatalogTracker(), true); + assertTrue(test(con, tableName, firstRow, server)); + assertTrue(test(con, tableName, lastRow, server)); + + // 4. phase III + split.transitionZKNode(server, regions.getFirst(), regions.getSecond()); + assertTrue(test(con, tableName, firstRow, server)); + assertTrue(test(con, tableName, lastRow, server)); + } + + /** + * attempt to locate the region and perform a get and scan + * @return True if successful, False otherwise. + */ + private boolean test(HConnection con, byte[] tableName, byte[] row, + HRegionServer server) { + // not using HTable to avoid timeouts and retries + try { + byte[] regionName = con.relocateRegion(tableName, row).getRegionInfo() + .getRegionName(); + // get and scan should now succeed without exception + server.get(regionName, new Get(row)); + server.openScanner(regionName, new Scan(row)); + } catch (IOException x) { + return false; + } + return true; + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (revision 1180297) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (working copy) @@ -203,19 +203,15 @@ } /** - * Run the transaction. + * Prepare the regions and region files. * @param server Hosting server instance. Can be null when testing (won't try * and update in zk if a null server) * @param services Used to online/offline regions. * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)} * @return Regions created - * @throws KeeperException - * @throws NodeExistsException - * @see #rollback(Server, RegionServerServices) */ - public PairOfSameType execute(final Server server, - final RegionServerServices services) - throws IOException { + /* package */PairOfSameType createDaughters(final Server server, + final RegionServerServices services) throws IOException { LOG.info("Starting split of region " + this.parent); if ((server != null && server.isStopped()) || (services != null && services.isStopping())) { @@ -298,7 +294,21 @@ MetaEditor.offlineParentInMeta(server.getCatalogTracker(), this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo()); } + return new PairOfSameType(a, b); + } + /** + * Perform time consuming opening of the daughter regions. + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @param a first daughter region + * @param a second daughter region + * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)} + */ + /* package */void openDaughters(final Server server, + final RegionServerServices services, HRegion a, HRegion b) + throws IOException { // This is the point of no return. Adding subsequent edits to .META. as we // do below when we do the daughter opens adding each to .META. can fail in // various interesting ways the most interesting of which is a timeout @@ -311,27 +321,64 @@ // still and the server shutdown fixup of .META. will point to these // regions. this.journal.add(JournalEntry.PONR); + boolean stopped = server != null && server.isStopped(); + boolean stopping = services != null && services.isStopping(); + // TODO: Is this check needed here? + if (stopped || stopping) { + // add 2nd daughter first (see HBASE-4335) + MetaEditor.addDaughter(server.getCatalogTracker(), + b.getRegionInfo(), null); + MetaEditor.addDaughter(server.getCatalogTracker(), + a.getRegionInfo(), null); + LOG.info("Not opening daughters " + + b.getRegionInfo().getRegionNameAsString() + + " and " + + a.getRegionInfo().getRegionNameAsString() + + " because stopping=" + stopping + ", stopped=" + stopped); + } else { // Open daughters in parallel. - DaughterOpener aOpener = new DaughterOpener(server, services, a); - DaughterOpener bOpener = new DaughterOpener(server, services, b); - aOpener.start(); - bOpener.start(); - try { - aOpener.join(); - bOpener.join(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("Interrupted " + e.getMessage()); + DaughterOpener aOpener = new DaughterOpener(server, a); + DaughterOpener bOpener = new DaughterOpener(server, b); + aOpener.start(); + bOpener.start(); + try { + aOpener.join(); + bOpener.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted " + e.getMessage()); + } + if (aOpener.getException() != null) { + throw new IOException("Failed " + + aOpener.getName(), aOpener.getException()); + } + if (bOpener.getException() != null) { + throw new IOException("Failed " + + bOpener.getName(), bOpener.getException()); + } + if (services != null) { + try { + // add 2nd daughter first (see HBASE-4335) + services.postOpenDeployTasks(b, server.getCatalogTracker(), true); + services.postOpenDeployTasks(a, server.getCatalogTracker(), true); + } catch (KeeperException ke) { + throw new IOException(ke); + } + } } - if (aOpener.getException() != null) { - throw new IOException("Failed " + - aOpener.getName(), aOpener.getException()); - } - if (bOpener.getException() != null) { - throw new IOException("Failed " + - bOpener.getName(), bOpener.getException()); - } + } + /** + * Finish off split transaction, transition the zknode + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @param a first daughter region + * @param a second daughter region + * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)} + */ + /* package */void transitionZKNode(final Server server, HRegion a, HRegion b) + throws IOException { // Tell master about split by updating zk. If we fail, abort. if (server != null && server.getZooKeeper() != null) { try { @@ -371,25 +418,41 @@ // Leaving here, the splitdir with its dross will be in place but since the // split was successful, just leave it; it'll be cleaned when parent is // deleted and cleaned up. - return new PairOfSameType(a, b); } + /** + * Run the transaction. + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @throws IOException If thrown, transaction failed. Call {@link #rollback(Server, RegionServerServices)} + * @return Regions created + * @throws KeeperException + * @throws NodeExistsException + * @see #rollback(Server, RegionServerServices) + */ + public PairOfSameType execute(final Server server, + final RegionServerServices services) + throws IOException { + PairOfSameType regions = createDaughters(server, services); + openDaughters(server, services, regions.getFirst(), regions.getSecond()); + transitionZKNode(server, regions.getFirst(), regions.getSecond()); + return regions; + } + /* * Open daughter region in its own thread. * If we fail, abort this hosting server. */ class DaughterOpener extends Thread { - private final RegionServerServices services; private final Server server; private final HRegion r; private Throwable t = null; - DaughterOpener(final Server s, final RegionServerServices services, - final HRegion r) { + DaughterOpener(final Server s, final HRegion r) { super((s == null? "null-services": s.getServerName()) + "-daughterOpener=" + r.getRegionInfo().getEncodedName()); setDaemon(true); - this.services = services; this.server = s; this.r = r; } @@ -405,7 +468,7 @@ @Override public void run() { try { - openDaughterRegion(this.server, this.services, r); + openDaughterRegion(this.server, r); } catch (Throwable t) { this.t = t; } @@ -420,26 +483,12 @@ * @throws IOException * @throws KeeperException */ - void openDaughterRegion(final Server server, - final RegionServerServices services, final HRegion daughter) + void openDaughterRegion(final Server server, final HRegion daughter) throws IOException, KeeperException { - boolean stopped = server != null && server.isStopped(); - boolean stopping = services != null && services.isStopping(); - if (stopped || stopping) { - MetaEditor.addDaughter(server.getCatalogTracker(), - daughter.getRegionInfo(), null); - LOG.info("Not opening daughter " + - daughter.getRegionInfo().getRegionNameAsString() + - " because stopping=" + stopping + ", stopped=" + server.isStopped()); - return; - } HRegionInfo hri = daughter.getRegionInfo(); LoggingProgressable reporter = server == null? null: new LoggingProgressable(hri, server.getConfiguration()); - HRegion r = daughter.openHRegion(reporter); - if (services != null) { - services.postOpenDeployTasks(r, server.getCatalogTracker(), true); - } + daughter.openHRegion(reporter); } static class LoggingProgressable implements CancelableProgressable {