diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 54734dd..d7afe71 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.security.AccessDeniedException; @@ -1530,6 +1531,18 @@ public abstract class FSUtils { } } + /** + * Filter for HFileLinks (StoreFiles and HFiles not included). + * the filter itself does not consider if a link is file or not. + */ + public static class HFileLinkFilter implements PathFilter { + + @Override + public boolean accept(Path p) { + return HFileLink.isHFileLink(p); + } + } + public static class ReferenceFileFilter extends AbstractFileStatusFilter { private final FileSystem fs; diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 6bd3f87..e11bacf 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -105,6 +105,8 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.io.FileLink; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.master.MasterFileSystem; @@ -252,6 +254,7 @@ public class HBaseFsck extends Configured implements Closeable { private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs private boolean fixSplitParents = false; // fix lingering split parents private boolean fixReferenceFiles = false; // fix lingering reference store file + private boolean fixHFileLinks = false; // fix lingering HFileLinks private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows private boolean fixTableLocks = false; // fix table locks which are expired private boolean fixTableZNodes = false; // fix table Znodes which are orphaned @@ -762,6 +765,7 @@ public class HBaseFsck extends Configured implements Closeable { // Do offline check and repair first offlineHdfsIntegrityRepair(); offlineReferenceFileRepair(); + offlineHLinkFileRepair(); // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it // is better to set Master into maintenance mode during online hbck. @@ -1128,6 +1132,73 @@ public class HBaseFsck extends Configured implements Closeable { } /** + * Scan all the store file names to find any lingering HFileLink files, + * which refer to some none-exiting files. If "fix" option is enabled, + * any lingering HFileLink file will be sidelined if found. + */ + private void offlineHLinkFileRepair() throws IOException, InterruptedException { + Configuration conf = getConf(); + Path hbaseRoot = FSUtils.getRootDir(conf); + FileSystem fs = hbaseRoot.getFileSystem(conf); + LOG.info("Computing mapping of all link files"); + Map allFiles = FSUtils + .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors); + errors.print(""); + + LOG.info("Validating mapping using HDFS state"); + for (Path path : allFiles.values()) { + // building HFileLink object to gather locations + HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path); + if (actualLink.exists(fs)) continue; // good, expected + + // Found a lingering HFileLink + errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path); + if (!shouldFixHFileLinks()) continue; + + // Now, trying to fix it since requested + setShouldRerun(); + + // An HFileLink path should be like + // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename + // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. + boolean success = sidelineFile(fs, hbaseRoot, path); + + if (!success) { + LOG.error("Failed to sideline HFileLink file " + path); + } + + // An HFileLink backreference path should be like + // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename + // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. + Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil + .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()), + HFileLink.getReferencedRegionName(path.getName().toString()), + path.getParent().getName()), + HFileLink.getReferencedHFileName(path.getName().toString())); + success = sidelineFile(fs, hbaseRoot, backRefPath); + + if (!success) { + LOG.error("Failed to sideline HFileLink backreference file " + path); + } + } + } + + private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException { + URI uri = hbaseRoot.toUri().relativize(path.toUri()); + if (uri.isAbsolute()) return false; + String relativePath = uri.getPath(); + Path rootDir = getSidelineDir(); + Path dst = new Path(rootDir, relativePath); + boolean pathCreated = fs.mkdirs(dst.getParent()); + if (!pathCreated) { + LOG.error("Failed to create path: " + dst.getParent()); + return false; + } + LOG.info("Trying to sideline file " + path + " to " + dst); + return fs.rename(path, dst); + } + + /** * TODO -- need to add tests for this. */ private void reportEmptyMetaCells() { @@ -3892,8 +3963,8 @@ public class HBaseFsck extends Configured implements Closeable { FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, - WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR, - UNDELETED_REPLICATION_QUEUE + LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, + ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR, UNDELETED_REPLICATION_QUEUE } void clear(); void report(String message); @@ -4471,6 +4542,15 @@ public class HBaseFsck extends Configured implements Closeable { return fixReferenceFiles; } + public void setFixHFileLinks(boolean shouldFix) { + fixHFileLinks = shouldFix; + fixAny |= shouldFix; + } + + boolean shouldFixHFileLinks() { + return fixHFileLinks; + } + public boolean shouldIgnorePreCheckPermission() { return !fixAny || ignorePreCheckPermission; } @@ -4587,6 +4667,7 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" -fixSplitParents Try to force offline split parents to be online."); out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); out.println(" -fixReferenceFiles Try to offline lingering reference store files"); + out.println(" -fixHFileLinks Try to offline lingering HFileLinks"); out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region" + " (empty REGIONINFO_QUALIFIER rows)"); @@ -4599,7 +4680,8 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" Metadata Repair shortcuts"); out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " + - "-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes"); + "-fixReferenceFiles -fixHFileLinks -fixTableLocks -fixOrphanedTableZnodes"); + out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); out.println(""); @@ -4733,6 +4815,8 @@ public class HBaseFsck extends Configured implements Closeable { sidelineCorruptHFiles = true; } else if (cmd.equals("-fixReferenceFiles")) { setFixReferenceFiles(true); + } else if (cmd.equals("-fixHFileLinks")) { + setFixHFileLinks(true); } else if (cmd.equals("-fixEmptyMetaCells")) { setFixEmptyMetaCells(true); } else if (cmd.equals("-repair")) { @@ -4748,6 +4832,7 @@ public class HBaseFsck extends Configured implements Closeable { setFixSplitParents(false); setCheckHdfs(true); setFixReferenceFiles(true); + setFixHFileLinks(true); setFixTableLocks(true); setFixTableZNodes(true); } else if (cmd.equals("-repairHoles")) { diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 2a3e7f4..e220977 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -65,6 +65,10 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; @@ -1736,7 +1740,7 @@ public class TestHBaseFsck { // for some time until children references are deleted. HBCK erroneously sees this as // overlapping regions HBaseFsck hbck = doFsck( - conf, true, true, false, false, false, true, true, true, false, false, false, false, null); + conf, true, true, false, false, false, true, true, true, false, false, false, false, false, null); assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported // assert that the split hbase:meta entry is still there. @@ -1809,7 +1813,7 @@ public class TestHBaseFsck { // now fix it. The fix should not revert the region split, but add daughters to META hbck = doFsck( - conf, true, true, false, false, false, false, false, false, false, false, false,false,null); + conf, true, true, false, false, false, false, false, false, false, false, false, false,false,null); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN }); @@ -2362,6 +2366,97 @@ public class TestHBaseFsck { } /** + * Test fixing lingering HFileLinks. + */ + @Test(timeout = 180000) + public void testLingeringHFileLinks() throws Exception { + TableName table = TableName.valueOf("testLingeringHFileLinks"); + try { + setupTable(table); + + FileSystem fs = FileSystem.get(conf); + Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); + Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0); + String regionName = regionDir.getName(); + Path famDir = new Path(regionDir, FAM_STR); + String HFILE_NAME = "01234567abcd"; + Path hFilePath = new Path(famDir, HFILE_NAME); + + // creating HFile + HFileContext context = new HFileContextBuilder().withIncludesTags(false).build(); + HFile.Writer w = + HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context) + .create(); + w.close(); + + HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME); + + // should report no error + HBaseFsck hbck = doFsck(conf, false); + assertNoErrors(hbck); + + // Delete linked file + fs.delete(hFilePath, true); + + // Check without fix should show the error + hbck = doFsck(conf, false); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fixing the error + hbck = doFsck(conf, true); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fix should sideline these files, thus preventing the error + hbck = doFsck(conf, false); + assertNoErrors(hbck); + } finally { + cleanupTable(table); + } + } + + @Test(timeout = 180000) + public void testCorruptLinkDirectory() throws Exception { + TableName table = TableName.valueOf("testLingeringHFileLinks"); + try { + setupTable(table); + FileSystem fs = FileSystem.get(conf); + + Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); + Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0); + Path famDir = new Path(regionDir, FAM_STR); + String regionName = regionDir.getName(); + String HFILE_NAME = "01234567abcd"; + String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME); + + // should report no error + HBaseFsck hbck = doFsck(conf, false); + assertNoErrors(hbck); + + // creating a directory with file instead of the HFileLink file + fs.mkdirs(new Path(famDir, link)); + fs.create(new Path(new Path(famDir, link), "somefile")); + + // Check without fix should show the error + hbck = doFsck(conf, false); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fixing the error + hbck = doFsck(conf, true); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fix should sideline these files, thus preventing the error + hbck = doFsck(conf, false); + assertNoErrors(hbck); + } finally { + cleanupTable(table); + } + } + + /** * Test mission REGIONINFO_QUALIFIER in hbase:meta */ @Test (timeout=180000) @@ -2843,7 +2938,7 @@ public class TestHBaseFsck { // fix hole assertErrors( doFsck( - conf, false, true, false, false, false, false, false, false, false, false, false, + conf, false, true, false, false, false, false, false, false, false, false, false, false, false, null), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java index 09f6978..1807add 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java @@ -40,14 +40,14 @@ public class HbckTestingUtil { public static HBaseFsck doFsck( Configuration conf, boolean fix, TableName table) throws Exception { - return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); + return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); } public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, - boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, - boolean fixTableZnodes, Boolean fixReplication, + boolean fixReferenceFiles, boolean fixHFileLinks, boolean fixEmptyMetaRegionInfo, + boolean fixTableLocks, boolean fixTableZnodes, Boolean fixReplication, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); fsck.setDisplayFullReport(); // i.e. -details @@ -60,6 +60,7 @@ public class HbckTestingUtil { fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); + fsck.setFixHFileLinks(fixHFileLinks); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixTableLocks(fixTableLocks); fsck.setFixReplication(fixReplication);