Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (revision 1407728) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (working copy) @@ -236,20 +236,27 @@ server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout", this.fileSplitTimeout); - this.journal.add(JournalEntry.STARTED_SPLITTING); // Set ephemeral SPLITTING znode up in zk. Mocked servers sometimes don't // have zookeeper so don't do zk stuff if server or zookeeper is null if (server != null && server.getZooKeeper() != null) { try { - this.znodeVersion = createNodeSplitting(server.getZooKeeper(), + createNodeSplitting(server.getZooKeeper(), this.parent.getRegionInfo(), server.getServerName()); } catch (KeeperException e) { - throw new IOException("Failed setting SPLITTING znode on " + + throw new IOException("Failed creating SPLITTING znode on " + this.parent.getRegionNameAsString(), e); } } this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK); - + if (server != null && server.getZooKeeper() != null) { + try { + this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(), + this.parent.getRegionInfo(), server.getServerName(), -1); + } catch (KeeperException e) { + throw new IOException("Failed setting SPLITTING znode on " + + this.parent.getRegionNameAsString(), e); + } + } createSplitDir(this.parent.getFilesystem(), this.splitdir); this.journal.add(JournalEntry.CREATE_SPLIT_DIR); @@ -740,15 +747,9 @@ JournalEntry je = iterator.previous(); switch(je) { - case STARTED_SPLITTING: - if (server != null && server.getZooKeeper() != null) { - cleanZK(server, this.parent.getRegionInfo(), false); - } - break; - case SET_SPLITTING_IN_ZK: if (server != null && server.getZooKeeper() != null) { - cleanZK(server, this.parent.getRegionInfo(), true); + cleanZK(server, this.parent.getRegionInfo()); } break; @@ -841,15 +842,11 @@ LOG.info("Cleaned up old failed split transaction detritus: " + splitdir); } - private static void cleanZK(final Server server, final HRegionInfo hri, boolean abort) { + private static void cleanZK(final Server server, final HRegionInfo hri) { try { // Only delete if its in expected state; could have been hijacked. ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(), EventType.RS_ZK_REGION_SPLITTING); - } catch (KeeperException.NoNodeException nn) { - if (abort) { - server.abort("Failed cleanup of " + hri.getRegionNameAsString(), nn); - } } catch (KeeperException e) { server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e); } @@ -869,7 +866,7 @@ * @throws KeeperException * @throws IOException */ - int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region, + void createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region, final ServerName serverName) throws KeeperException, IOException { LOG.debug(zkw.prefix("Creating ephemeral node for " + region.getEncodedName() + " in SPLITTING state")); @@ -881,9 +878,6 @@ if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, data.getBytes())) { throw new IOException("Failed create of ephemeral " + node); } - // Transition node from SPLITTING to SPLITTING and pick up version so we - // can be sure this znode is ours; version is needed deleting. - return transitionNodeSplitting(zkw, region, serverName, -1); } /** Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (revision 1407728) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (working copy) @@ -27,6 +27,8 @@ import java.io.IOException; import java.util.List; +import java.util.NavigableMap; +import java.util.concurrent.CountDownLatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -37,6 +39,7 @@ import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.master.AssignmentManager; @@ -70,7 +73,11 @@ private HBaseAdmin admin = null; private MiniHBaseCluster cluster = null; private static final int NB_SERVERS = 2; - + private static CountDownLatch latch = new CountDownLatch(1); + private static boolean secondSplit = false; + private static boolean callRollBack = false; + private static boolean firstSplitCompleted = false; + private static final HBaseTestingUtility TESTING_UTIL = new HBaseTestingUtility(); @@ -564,7 +571,7 @@ } else { st = new MockedSplitTransaction(regions.get(0), null) { @Override - int createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName) + void createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName) throws KeeperException, IOException { throw new IOException(); } @@ -643,6 +650,100 @@ } } + @Test(timeout = 2000000) + public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception { + final byte[] tableName = Bytes + .toBytes("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack"); + HBaseAdmin admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); + try { + // Create table then get the single region for our new table. + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor("cf")); + admin.createTable(htd); + HTable t = new HTable(cluster.getConfiguration(), tableName); + while (!(cluster.getRegions(tableName).size() == 1)) { + Thread.sleep(100); + } + final List regions = cluster.getRegions(tableName); + HRegionInfo hri = getAndCheckSingleTableRegion(regions); + int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName()); + final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); + insertData(tableName, admin, t); + // Turn off balancer so it doesn't cut in and mess up our placements. + this.admin.setBalancerRunning(false, false); + // Turn off the meta scanner so it don't remove parent on us. + cluster.getMaster().setCatalogJanitorEnabled(false); + + new Thread() { + public void run() { + SplitTransaction st = null; + st = new MockedSplitTransaction(regions.get(0), Bytes.toBytes("row2")); + try { + st.prepare(); + st.execute(regionServer, regionServer); + } catch (IOException e) { + + } + } + }.start(); + while (!callRollBack) { + Thread.sleep(100); + } + SplitTransaction st = null; + st = new MockedSplitTransaction(regions.get(0), Bytes.toBytes("row2")); + try { + secondSplit = true; + st.prepare(); + st.execute(regionServer, regionServer); + } catch (IOException e) { + LOG.debug("Rollback started :"+ e.getMessage()); + st.rollback(regionServer, regionServer); + } + while (!firstSplitCompleted) { + Thread.sleep(100); + } + NavigableMap rit = cluster.getMaster().getAssignmentManager() + .getRegionsInTransition(); + while (rit.containsKey(hri.getTableNameAsString())) { + Thread.sleep(100); + } + List onlineRegions = regionServer.getOnlineRegions(tableName); + // Region server side split is successful. + assertEquals("The parent region should be splitted", 2, onlineRegions.size()); + //Should be present in RIT + List regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionsOfTable(tableName); + // Master side should also reflect the same + assertEquals("No of regions in master", 2, regionsOfTable.size()); + } finally { + admin.setBalancerRunning(true, false); + secondSplit = false; + firstSplitCompleted = false; + callRollBack = false; + cluster.getMaster().setCatalogJanitorEnabled(true); + if (admin.isTableAvailable(tableName) && admin.isTableEnabled(tableName)) { + admin.disableTable(tableName); + admin.deleteTable(tableName); + admin.close(); + } + } + } + + private void insertData(final byte[] tableName, HBaseAdmin admin, HTable t) throws IOException, + InterruptedException { + Put p = new Put(Bytes.toBytes("row1")); + p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); + t.put(p); + p = new Put(Bytes.toBytes("row2")); + p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); + t.put(p); + p = new Put(Bytes.toBytes("row3")); + p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); + t.put(p); + p = new Put(Bytes.toBytes("row4")); + p.add(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); + t.put(p); + admin.flush(tableName); + } @Test(timeout = 15000) public void testShouldThrowIOExceptionIfStoreFileSizeIsEmptyAndSHouldSuccessfullyExecuteRollback() throws Exception { @@ -695,9 +796,44 @@ } public static class MockedSplitTransaction extends SplitTransaction { + private HRegion currentRegion; public MockedSplitTransaction(HRegion r, byte[] splitrow) { super(r, splitrow); + this.currentRegion = r; } + + @Override + void transitionZKNode(Server server, RegionServerServices services, HRegion a, HRegion b) + throws IOException { + if (this.currentRegion.getRegionInfo().getTableNameAsString() + .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) { + try { + if (!secondSplit){ + callRollBack = true; + latch.await(); + } + } catch (InterruptedException e) { + } + + } + super.transitionZKNode(server, services, a, b); + if (this.currentRegion.getRegionInfo().getTableNameAsString() + .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) { + firstSplitCompleted = true; + } + } + @Override + public boolean rollback(Server server, RegionServerServices services) throws IOException { + if (this.currentRegion.getRegionInfo().getTableNameAsString() + .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) { + if(secondSplit){ + super.rollback(server, services); + latch.countDown(); + return true; + } + } + return super.rollback(server, services); + } }