diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index fc80d9c..3d12baf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -170,6 +171,9 @@ public class AssignmentManager extends ZooKeeperListener { private final Map> mergingRegions = new HashMap>(); + private final Map> splitRegions + = new HashMap>(); + /** * The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment * failure due to lack of availability of region plan or bad region plan @@ -1308,14 +1312,30 @@ public class AssignmentManager extends ZooKeeperListener { boolean disabled = getZKTable().isDisablingOrDisabledTable(regionInfo.getTable()); ServerName serverName = rs.getServerName(); if (serverManager.isServerOnline(serverName)) { - if (rs.isOnServer(serverName) - && (rs.isOpened() || rs.isSplitting())) { - regionOnline(regionInfo, serverName); - if (disabled) { - // if server is offline, no hurt to unassign again - LOG.info("Opened " + regionNameStr - + "but this table is disabled, triggering close of region"); - unassign(regionInfo); + if (rs.isOnServer(serverName) && (rs.isOpened() || rs.isSplitting())) { + synchronized (regionStates) { + regionOnline(regionInfo, serverName); + if (rs.isSplitting()) { + // Check if the daugter regions are still there, if they are present, offline + // as its the case of a rollback. + HRegionInfo hri_a = splitRegions.get(regionInfo).getFirst(); + HRegionInfo hri_b = splitRegions.get(regionInfo).getSecond(); + if (!regionStates.isRegionInTransition(hri_a.getEncodedName())) { + LOG.warn("Split daughter region not in transition " + hri_a); + } + if (!regionStates.isRegionInTransition(hri_b.getEncodedName())) { + LOG.warn("Split daughter region not in transition" + hri_b); + } + regionOffline(hri_a); + regionOffline(hri_b); + splitRegions.remove(regionInfo); + } + if (disabled) { + // if server is offline, no hurt to unassign again + LOG.info("Opened " + regionNameStr + + "but this table is disabled, triggering close of region"); + unassign(regionInfo); + } } } else if (rs.isMergingNew()) { synchronized (regionStates) { @@ -3980,6 +4000,7 @@ public class AssignmentManager extends ZooKeeperListener { } synchronized (regionStates) { + splitRegions.put(p, new PairOfSameType(hri_a, hri_b)); regionStates.updateRegionState(hri_a, State.SPLITTING_NEW, sn); regionStates.updateRegionState(hri_b, State.SPLITTING_NEW, sn); regionStates.updateRegionState(rt, State.SPLITTING); @@ -3995,6 +4016,7 @@ public class AssignmentManager extends ZooKeeperListener { regionOffline(p, State.SPLIT); regionOnline(hri_a, sn); regionOnline(hri_b, sn); + splitRegions.remove(p); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index e6b9725..b6744e0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -1162,6 +1162,40 @@ public class TestSplitTransactionOnCluster { return(null); } + @Test + public void testFailedSplit() throws Exception { + TableName tableName = TableName.valueOf("testFailedSplit"); + byte[] colFamily = Bytes.toBytes("info"); + TESTING_UTIL.createTable(tableName, colFamily); + HTable table = new HTable(TESTING_UTIL.getConfiguration(), tableName); + try { + TESTING_UTIL.loadTable(table, colFamily); + List regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName); + assertTrue(regions.size() == 1); + final HRegion actualRegion = cluster.getRegions(tableName).get(0); + actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class, + Coprocessor.PRIORITY_USER, actualRegion.getBaseConf()); + + // The following split would fail. + admin.split(tableName.getNameAsString()); + FailingSplitRegionObserver.latch.await(); + LOG.info("Waiting for region to come out of RIT"); + TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); + Map rit = regionStates.getRegionsInTransition(); + return !rit.containsKey(actualRegion.getRegionInfo().getEncodedName()); + } + }); + regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName); + assertTrue(regions.size() == 1); + } finally { + table.close(); + TESTING_UTIL.deleteTable(tableName); + } + } + private List checkAndGetDaughters(byte[] tableName) throws InterruptedException { List daughters = null;