diff --git src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index 780ed0a..cbb6d48 100644 --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.PairOfSameType; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.ipc.RemoteException; @@ -402,6 +403,8 @@ public class MetaReader { return Writables.getHRegionInfoOrNull(bytes); } + + /** * Checks if the specified table exists. Looks at the META table hosted on * the specified server. @@ -450,6 +453,21 @@ public class MetaReader { } /** + * Returns the daughter regions by reading the corresponding columns of the catalog table + * Result. + * @param data a Result object from the catalog table scan + * @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split + * parent + */ + public static PairOfSameType getDaughterRegions(Result data) throws IOException { + HRegionInfo splitA = Writables.getHRegionInfoOrNull(data.getValue(HConstants.CATALOG_FAMILY, + HConstants.SPLITA_QUALIFIER)); + HRegionInfo splitB = Writables.getHRegionInfoOrNull(data.getValue(HConstants.CATALOG_FAMILY, + HConstants.SPLITB_QUALIFIER)); + return new PairOfSameType(splitA, splitB); + } + + /** * Gets all of the regions of the specified table. * @param catalogTracker * @param tableName diff --git src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 674ac1a..cf6e03b 100644 --- src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -236,7 +236,7 @@ public class HBaseFsck extends Configured implements Tool { * When initially looking at HDFS, we attempt to find any orphaned data. */ private List orphanHdfsDirs = Collections.synchronizedList(new ArrayList()); - + private Map> orphanTableDirs = new HashMap>(); /** @@ -395,7 +395,7 @@ public class HBaseFsck extends Configured implements Tool { if (!checkMetaOnly) { reportTablesInFlux(); } - + // get regions according to what is online on each RegionServer loadDeployedRegions(); @@ -800,19 +800,21 @@ public class HBaseFsck extends Configured implements Tool { if (!orphanTableDirs.containsKey(tableName)) { LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); //should only report once for each table - errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, + errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); Set columns = new HashSet(); orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); } } } - modTInfo.addRegionInfo(hbi); + if (!hbi.isSkipChecks()) { + modTInfo.addRegionInfo(hbi); + } } return tablesInfo; } - + /** * To get the column family list according to the column family dirs * @param columns @@ -830,7 +832,7 @@ public class HBaseFsck extends Configured implements Tool { } return columns; } - + /** * To fabricate a .tableinfo file with following contents
* 1. the correct tablename
@@ -848,7 +850,7 @@ public class HBaseFsck extends Configured implements Tool { FSTableDescriptors.createTableDescriptor(htd, getConf(), true); return true; } - + /** * To fix orphan table by creating a .tableinfo file under tableDir
* 1. if TableInfo is cached, to recover the .tableinfo accordingly
@@ -1661,6 +1663,18 @@ public class HBaseFsck extends Configured implements Tool { // ========== Cases where the region is in META ============= } else if (inMeta && inHdfs && !isDeployed && splitParent) { + // check whether this is an actual error, or just transient state where parent + // is not cleaned + if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) { + // check that split daughters are there + HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName()); + HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName()); + if (infoA != null && infoB != null) { + // we already processed or will process daughters. Move on, nothing to see here. + hbi.setSkipChecks(true); + return; + } + } errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " + descriptiveName + " is a split parent in META, in HDFS, " + "and not deployed on any region server. This could be transient."); @@ -1791,7 +1805,9 @@ public class HBaseFsck extends Configured implements Tool { modTInfo.addServer(server); } - modTInfo.addRegionInfo(hbi); + if (!hbi.isSkipChecks()) { + modTInfo.addRegionInfo(hbi); + } tablesInfo.put(tableName, modTInfo); } @@ -2555,7 +2571,8 @@ public class HBaseFsck extends Configured implements Tool { || hri.isMetaRegion() || hri.isRootRegion())) { return true; } - MetaEntry m = new MetaEntry(hri, sn, ts); + PairOfSameType daughters = MetaReader.getDaughterRegions(result); + MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond()); HbckInfo hbInfo = new HbckInfo(m); HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo); if (previous != null) { @@ -2594,11 +2611,19 @@ public class HBaseFsck extends Configured implements Tool { static class MetaEntry extends HRegionInfo { ServerName regionServer; // server hosting this region long modTime; // timestamp of most recent modification metadata + HRegionInfo splitA, splitB; //split daughters public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) { + this(rinfo, regionServer, modTime, null, null); + } + + public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime, + HRegionInfo splitA, HRegionInfo splitB) { super(rinfo); this.regionServer = regionServer; this.modTime = modTime; + this.splitA = splitA; + this.splitB = splitB; } public boolean equals(Object o) { @@ -2647,6 +2672,7 @@ public class HBaseFsck extends Configured implements Tool { private HdfsEntry hdfsEntry = null; // info in HDFS private List deployedEntries = Lists.newArrayList(); // on Region Server private List deployedOn = Lists.newArrayList(); // info on RS's + private boolean skipChecks = false; // whether to skip further checks to this region info. HbckInfo(MetaEntry metaEntry) { this.metaEntry = metaEntry; @@ -2764,6 +2790,14 @@ public class HBaseFsck extends Configured implements Tool { } return hdfsEntry.hri; } + + public void setSkipChecks(boolean skipChecks) { + this.skipChecks = skipChecks; + } + + public boolean isSkipChecks() { + return skipChecks; + } } final static Comparator cmp = new Comparator() { @@ -3239,15 +3273,15 @@ public class HBaseFsck extends Configured implements Tool { boolean shouldFixHdfsHoles() { return fixHdfsHoles; } - + public void setFixTableOrphans(boolean shouldFix) { fixTableOrphans = shouldFix; } - + boolean shouldFixTableOrphans() { return fixTableOrphans; } - + public void setFixHdfsOverlaps(boolean shouldFix) { fixHdfsOverlaps = shouldFix; } diff --git src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java index 34e9701..4e62a1f 100644 --- src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; @@ -199,7 +200,6 @@ public class TestEndToEndSplitTransaction { HTable table; byte[] tableName, family; HBaseAdmin admin; - HTable metaTable; HRegionServer rs; RegionSplitter(HTable table) throws IOException { @@ -208,7 +208,6 @@ public class TestEndToEndSplitTransaction { this.family = table.getTableDescriptor().getFamiliesKeys().iterator().next(); admin = TEST_UTIL.getHBaseAdmin(); rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0); - metaTable = new HTable(conf, HConstants.META_TABLE_NAME); } public void run() { @@ -239,14 +238,14 @@ public class TestEndToEndSplitTransaction { addData(start); addData(mid); - flushAndBlockUntilDone(region.getRegionName()); - compactAndBlockUntilDone(region.getRegionName()); + flushAndBlockUntilDone(admin, rs, region.getRegionName()); + compactAndBlockUntilDone(admin, rs, region.getRegionName()); log("Initiating region split for:" + region.getRegionNameAsString()); try { admin.split(region.getRegionName(), splitPoint); //wait until the split is complete - blockUntilRegionSplit(50000, region.getRegionName(), true); + blockUntilRegionSplit(conf, 50000, region.getRegionName(), true); } catch (NotServingRegionException ex) { //ignore @@ -254,10 +253,6 @@ public class TestEndToEndSplitTransaction { } } catch (Throwable ex) { this.ex = ex; - } finally { - if (metaTable != null) { - IOUtils.closeQuietly(metaTable); - } } } @@ -270,106 +265,6 @@ public class TestEndToEndSplitTransaction { } table.flushCommits(); } - - void flushAndBlockUntilDone(byte[] regionName) throws IOException, InterruptedException { - log("flushing region: " + Bytes.toStringBinary(regionName)); - admin.flush(regionName); - log("blocking until flush is complete: " + Bytes.toStringBinary(regionName)); - Threads.sleepWithoutInterrupt(500); - while (rs.cacheFlusher.getFlushQueueSize() > 0) { - Threads.sleep(50); - } - } - - void compactAndBlockUntilDone(byte[] regionName) throws IOException, - InterruptedException { - log("Compacting region: " + Bytes.toStringBinary(regionName)); - admin.majorCompact(regionName); - log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName)); - Threads.sleepWithoutInterrupt(500); - while (rs.compactSplitThread.getCompactionQueueSize() > 0) { - Threads.sleep(50); - } - } - - /** bloks until the region split is complete in META and region server opens the daughters */ - void blockUntilRegionSplit(long timeout, final byte[] regionName, boolean waitForDaughters) - throws IOException, InterruptedException { - long start = System.currentTimeMillis(); - log("blocking until region is split:" + Bytes.toStringBinary(regionName)); - HRegionInfo daughterA = null, daughterB = null; - - while (System.currentTimeMillis() - start < timeout) { - Result result = getRegionRow(regionName); - if (result == null) { - break; - } - - HRegionInfo region = MetaEditor.getHRegionInfo(result); - if(region.isSplitParent()) { - log("found parent region: " + region.toString()); - PairOfSameType pair = MetaEditor.getDaughterRegions(result); - daughterA = pair.getFirst(); - daughterB = pair.getSecond(); - break; - } - sleep(100); - } - - //if we are here, this means the region split is complete or timed out - if (waitForDaughters) { - long rem = timeout - (System.currentTimeMillis() - start); - blockUntilRegionIsInMeta(rem, daughterA.getRegionName()); - - rem = timeout - (System.currentTimeMillis() - start); - blockUntilRegionIsInMeta(rem, daughterB.getRegionName()); - - rem = timeout - (System.currentTimeMillis() - start); - blockUntilRegionIsOpenedByRS(rem, daughterA.getRegionName()); - - rem = timeout - (System.currentTimeMillis() - start); - blockUntilRegionIsOpenedByRS(rem, daughterB.getRegionName()); - } - } - - Result getRegionRow(byte[] regionName) throws IOException { - Get get = new Get(regionName); - return metaTable.get(get); - } - - void blockUntilRegionIsInMeta(long timeout, byte[] regionName) - throws IOException, InterruptedException { - log("blocking until region is in META: " + Bytes.toStringBinary(regionName)); - long start = System.currentTimeMillis(); - while (System.currentTimeMillis() - start < timeout) { - Result result = getRegionRow(regionName); - if (result != null) { - HRegionInfo info = MetaEditor.getHRegionInfo(result); - if (info != null && !info.isOffline()) { - log("found region in META: " + Bytes.toStringBinary(regionName)); - break; - } - } - sleep(10); - } - } - - void blockUntilRegionIsOpenedByRS(long timeout, byte[] regionName) - throws IOException, InterruptedException { - log("blocking until region is opened by region server: " + Bytes.toStringBinary(regionName)); - long start = System.currentTimeMillis(); - while (System.currentTimeMillis() - start < timeout) { - List regions = rs.getOnlineRegions(tableName); - for (HRegion region : regions) { - if (Bytes.compareTo(region.getRegionName(), regionName) == 0) { - log("found region open in RS: " + Bytes.toStringBinary(regionName)); - return; - } - } - sleep(10); - } - } - } /** @@ -476,6 +371,120 @@ public class TestEndToEndSplitTransaction { LOG.info(msg); } + /* some utility methods for split tests */ + + public static void flushAndBlockUntilDone(HBaseAdmin admin, HRegionServer rs, byte[] regionName) + throws IOException, InterruptedException { + log("flushing region: " + Bytes.toStringBinary(regionName)); + admin.flush(regionName); + log("blocking until flush is complete: " + Bytes.toStringBinary(regionName)); + Threads.sleepWithoutInterrupt(500); + while (rs.cacheFlusher.getFlushQueueSize() > 0) { + Threads.sleep(50); + } + } + + public static void compactAndBlockUntilDone(HBaseAdmin admin, HRegionServer rs, byte[] regionName) + throws IOException, InterruptedException { + log("Compacting region: " + Bytes.toStringBinary(regionName)); + admin.majorCompact(regionName); + log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName)); + Threads.sleepWithoutInterrupt(500); + while (rs.compactSplitThread.getCompactionQueueSize() > 0) { + Threads.sleep(50); + } + } + + /** Blocks until the region split is complete in META and region server opens the daughters */ + public static void blockUntilRegionSplit(Configuration conf, long timeout, + final byte[] regionName, boolean waitForDaughters) + throws IOException, InterruptedException { + long start = System.currentTimeMillis(); + log("blocking until region is split:" + Bytes.toStringBinary(regionName)); + HRegionInfo daughterA = null, daughterB = null; + HTable metaTable = new HTable(conf, HConstants.META_TABLE_NAME); + + try { + while (System.currentTimeMillis() - start < timeout) { + Result result = getRegionRow(metaTable, regionName); + if (result == null) { + break; + } + + HRegionInfo region = MetaReader.parseCatalogResult(result).getFirst(); + if(region.isSplitParent()) { + log("found parent region: " + region.toString()); + PairOfSameType pair = MetaReader.getDaughterRegions(result); + daughterA = pair.getFirst(); + daughterB = pair.getSecond(); + break; + } + Threads.sleep(100); + } + + //if we are here, this means the region split is complete or timed out + if (waitForDaughters) { + long rem = timeout - (System.currentTimeMillis() - start); + blockUntilRegionIsInMeta(metaTable, rem, daughterA); + + rem = timeout - (System.currentTimeMillis() - start); + blockUntilRegionIsInMeta(metaTable, rem, daughterB); + + rem = timeout - (System.currentTimeMillis() - start); + blockUntilRegionIsOpened(conf, rem, daughterA); + + rem = timeout - (System.currentTimeMillis() - start); + blockUntilRegionIsOpened(conf, rem, daughterB); + } + } finally { + IOUtils.closeQuietly(metaTable); + } + } + + public static Result getRegionRow(HTable metaTable, byte[] regionName) throws IOException { + Get get = new Get(regionName); + return metaTable.get(get); + } + + public static void blockUntilRegionIsInMeta(HTable metaTable, long timeout, HRegionInfo hri) + throws IOException, InterruptedException { + log("blocking until region is in META: " + hri.getRegionNameAsString()); + long start = System.currentTimeMillis(); + while (System.currentTimeMillis() - start < timeout) { + Result result = getRegionRow(metaTable, hri.getRegionName()); + if (result != null) { + HRegionInfo info = MetaReader.parseCatalogResult(result).getFirst(); + if (info != null && !info.isOffline()) { + log("found region in META: " + hri.getRegionNameAsString()); + break; + } + } + Threads.sleep(10); + } + } + + public static void blockUntilRegionIsOpened(Configuration conf, long timeout, HRegionInfo hri) + throws IOException, InterruptedException { + log("blocking until region is opened for reading:" + hri.getRegionNameAsString()); + long start = System.currentTimeMillis(); + HTable table = new HTable(conf, hri.getTableName()); + + try { + Get get = new Get(hri.getStartKey()); + while (System.currentTimeMillis() - start < timeout) { + try { + table.get(get); + break; + } catch(IOException ex) { + //wait some more + } + Threads.sleep(10); + } + } finally { + IOUtils.closeQuietly(table); + } + } + @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); diff --git src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 3771d9a..84d8db8 100644 --- src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -38,6 +38,7 @@ import java.util.Map.Entry; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -54,6 +55,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; @@ -70,11 +72,12 @@ import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; -import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter; -import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo; +import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter; +import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -409,7 +412,7 @@ public class TestHBaseFsck { deleteTable(table); } } - + @Test public void testHbckFixOrphanTable() throws Exception { String table = "tableInfo"; @@ -418,31 +421,31 @@ public class TestHBaseFsck { try { setupTable(table); HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); - + Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table ); fs = hbaseTableDir.getFileSystem(conf); FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); tableinfo = status.getPath(); fs.rename(tableinfo, new Path("/.tableinfo")); - + //to report error if .tableinfo is missing. - HBaseFsck hbck = doFsck(conf, false); + HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE }); - + // fix OrphanTable with default .tableinfo (htd not yet cached on master) hbck = doFsck(conf, true); assertNoErrors(hbck); status = null; status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); assertNotNull(status); - + HTableDescriptor htd = admin.getTableDescriptor(table.getBytes()); htd.setValue("NOT_DEFAULT", "true"); admin.disableTable(table); admin.modifyTable(table.getBytes(), htd); admin.enableTable(table); fs.delete(status.getPath(), true); - + // fix OrphanTable with cache htd = admin.getTableDescriptor(table.getBytes()); // warms up cached htd on master hbck = doFsck(conf, true); @@ -1185,6 +1188,7 @@ public class TestHBaseFsck { @Test public void testLingeringSplitParent() throws Exception { String table = "testLingeringSplitParent"; + HTable meta = null; try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1198,7 +1202,7 @@ public class TestHBaseFsck { Bytes.toBytes("C"), true, true, false); // Create a new meta entry to fake it as a split parent. - HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName()); + meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName()); HRegionInfo hri = location.getRegionInfo(); HRegionInfo a = new HRegionInfo(tbl.getTableName(), @@ -1256,6 +1260,119 @@ public class TestHBaseFsck { assertEquals(ROWKEYS.length, countRows()); } finally { deleteTable(table); + IOUtils.closeQuietly(meta); + } + } + + /** + * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for + * valid cases where the daughters are there. + */ + @Test + public void testValidLingeringSplitParent() throws Exception { + String table = "testLingeringSplitParent"; + HTable meta = null; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // make sure data in regions, if in hlog only there is no data loss + TEST_UTIL.getHBaseAdmin().flush(table); + HRegionLocation location = tbl.getRegionLocation("B"); + + meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName()); + HRegionInfo hri = location.getRegionInfo(); + + // do a regular split + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + byte[] regionName = location.getRegionInfo().getRegionName(); + admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); + TestEndToEndSplitTransaction.blockUntilRegionSplit( + TEST_UTIL.getConfiguration(), 60000, regionName, true); + + // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on + // for some time until children references are deleted. HBCK erroneously sees this as + // overlapping regions + HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, null); + assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported + + // assert that the split META entry is still there. + Get get = new Get(hri.getRegionName()); + Result result = meta.get(get); + assertNotNull(result); + assertNotNull(MetaReader.parseCatalogResult(result).getFirst()); + + assertEquals(ROWKEYS.length, countRows()); + + // assert that we still have the split regions + assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split. + assertNoErrors(doFsck(conf, false)); + } finally { + deleteTable(table); + IOUtils.closeQuietly(meta); + } + } + + /** + * Split crashed after write to META finished for the parent region, but + * failed to write daughters (pre HBASE-7721 codebase) + */ + @Test + public void testSplitDaughtersNotInMeta() throws Exception { + String table = "testSplitdaughtersNotInMeta"; + HTable meta = null; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // make sure data in regions, if in hlog only there is no data loss + TEST_UTIL.getHBaseAdmin().flush(table); + HRegionLocation location = tbl.getRegionLocation("B"); + + meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName()); + HRegionInfo hri = location.getRegionInfo(); + + // do a regular split + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + byte[] regionName = location.getRegionInfo().getRegionName(); + admin.split(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); + TestEndToEndSplitTransaction.blockUntilRegionSplit( + TEST_UTIL.getConfiguration(), 60000, regionName, true); + + PairOfSameType daughters = MetaReader.getDaughterRegions(meta.get(new Get(regionName))); + + // Delete daughter regions from meta, but not hdfs, unassign it. + Map hris = tbl.getRegionLocations(); + undeployRegion(admin, hris.get(daughters.getFirst()), daughters.getFirst()); + undeployRegion(admin, hris.get(daughters.getSecond()), daughters.getSecond()); + + meta.delete(new Delete(daughters.getFirst().getRegionName())); + meta.delete(new Delete(daughters.getSecond().getRegionName())); + meta.flushCommits(); + + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); //no LINGERING_SPLIT_PARENT + + // now fix it. The fix should not revert the region split, but add daughters to META + hbck = doFsck(conf, true, true, false, false, false, false, false, false, null); + assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // assert that the split META entry is still there. + Get get = new Get(hri.getRegionName()); + Result result = meta.get(get); + assertNotNull(result); + assertNotNull(MetaReader.parseCatalogResult(result).getFirst()); + + assertEquals(ROWKEYS.length, countRows()); + + // assert that we still have the split regions + assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions pre-split. + assertNoErrors(doFsck(conf, false)); //should be fixed by now + } finally { + deleteTable(table); + IOUtils.closeQuietly(meta); } }