Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (revision 1410060) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (working copy) @@ -145,8 +145,7 @@ // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection().getZooKeeperWatcher(), hri.getEncodedName()); @@ -174,7 +173,12 @@ assertTrue(daughters.contains(r)); } // Finally assert that the ephemeral SPLIT znode was cleaned up. - stats = t.getConnection().getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); + for (int i=0; i<100; i++) { + // wait a bit (10s max) for the node to disappear + stats = t.getConnection().getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); + if (stats == null) break; + Thread.sleep(100); + } LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats); assertTrue(stats == null); } finally { @@ -228,8 +232,7 @@ // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // OK, so split happened after we cleared the blocking node. } finally { admin.setBalancerRunning(true, false); @@ -271,8 +274,7 @@ // Now split. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Remove one of the daughters from .META. to simulate failed insert of // daughter region up into .META. removeDaughterFromMeta(daughters.get(0).getRegionName()); @@ -328,8 +330,7 @@ // Now split. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Now split one of the daughters. regionCount = server.getOnlineRegions().size(); HRegionInfo daughter = daughters.get(0).getRegionInfo(); @@ -411,8 +412,7 @@ // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); @@ -489,8 +489,7 @@ split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); @@ -836,7 +835,20 @@ } } - + + private List checkAndGetDaughters(byte[] tableName) + throws InterruptedException { + List daughters = null; + // try up to 10s + for (int i=0; i<100; i++) { + daughters = cluster.getRegions(tableName); + if (daughters.size() >= 2) break; + Thread.sleep(100); + } + assertTrue(daughters.size() >= 2); + return daughters; + } + private MockMasterWithoutCatalogJanitor abortAndWaitForMaster() throws IOException, InterruptedException { cluster.abortMaster(0);