diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index ec21dce..d3d6cdf 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -72,6 +72,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.security.AccessDeniedException; @@ -1481,6 +1482,32 @@ public abstract class FSUtils { } } + /** + * Filter for HFileLinks (StoreFiles and HFiles not included). + */ + public static class HFileLinkFilter extends AbstractFileStatusFilter { + final FileSystem fs; + + public HFileLinkFilter(FileSystem fs) { + this.fs = fs; + } + + @Override + protected boolean accept(Path p, @CheckForNull Boolean isDir) { + if (!HFileLink.isHFileLink(p)) { + return false; + } + + try { + return isFile(fs, isDir, p); + } catch (IOException ioe) { + // Maybe the file was moved or the fs was disconnected. + LOG.warn("Skipping file " + p + " due to IOException", ioe); + return false; + } + } + } + public static class ReferenceFileFilter extends AbstractFileStatusFilter { private final FileSystem fs; diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index f116c2e..0e2b925 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -110,6 +110,8 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableState; +import org.apache.hadoop.hbase.io.FileLink; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.master.MasterFileSystem; @@ -248,6 +250,7 @@ public class HBaseFsck extends Configured implements Closeable { private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs private boolean fixSplitParents = false; // fix lingering split parents private boolean fixReferenceFiles = false; // fix lingering reference store file + private boolean fixHFileLinkFiles = false; // fix lingering HFileLink files private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows private boolean fixTableLocks = false; // fix table locks which are expired private boolean fixReplication = false; // fix undeleted replication queues for removed peer @@ -753,6 +756,7 @@ public class HBaseFsck extends Configured implements Closeable { // Do offline check and repair first offlineHdfsIntegrityRepair(); offlineReferenceFileRepair(); + offlineHLinkFileRepair(); // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it // is better to set Master into maintenance mode during online hbck. @@ -1116,6 +1120,69 @@ public class HBaseFsck extends Configured implements Closeable { } /** + * Scan all the store file names to find any lingering HFileLink files, + * which refer to some none-exiting files. If "fix" option is enabled, + * any lingering HFileLink file will be sidelined if found. + */ + private void offlineHLinkFileRepair() throws IOException, InterruptedException { + Configuration conf = getConf(); + Path hbaseRoot = FSUtils.getRootDir(conf); + FileSystem fs = hbaseRoot.getFileSystem(conf); + LOG.info("Computing mapping of all link files"); + Map allFiles = FSUtils + .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(fs), executor, errors); + errors.print(""); + + LOG.info("Validating mapping using HDFS state"); + for (Path path : allFiles.values()) { + // building HFileLink object to gather locations + HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path); + if (actualLink.exists(fs)) continue; // good, expected + + // Found a lingering HFileLink file + errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink file " + path); + if (!shouldFixHFileLinkFiles()) continue; + + // Now, trying to fix it since requested + setShouldRerun(); + + // An HFileLink path should be like + // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename + // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. + boolean success = sidelineFile(fs, hbaseRoot, path); + + if (!success) { + LOG.error("Failed to sideline HFileLink file " + path); + } + + // An HFileLink backreference path should be like + // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename + // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. + Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil + .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()), + HFileLink.getReferencedRegionName(path.getName().toString()), + path.getParent().getName()), + HFileLink.getReferencedHFileName(path.getName().toString())); + success = sidelineFile(fs, hbaseRoot, backRefPath); + + if (!success) { + LOG.error("Failed to sideline HFileLink backreference file " + path); + } + } + } + + private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException { + URI uri = hbaseRoot.toUri().relativize(path.toUri()); + if (uri.isAbsolute()) return false; + String directoryStructure = hbaseRoot.toUri().relativize(path.toUri()).getPath(); + Path rootDir = getSidelineDir(); + Path dst = new Path(rootDir, directoryStructure); + fs.mkdirs(dst.getParent()); + LOG.info("Trying to sideline file " + path + " to " + dst); + return fs.rename(path, dst); + } + + /** * TODO -- need to add tests for this. */ private void reportEmptyMetaCells() { @@ -3869,8 +3936,8 @@ public class HBaseFsck extends Configured implements Closeable { FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, - WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE, - NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE + LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, + ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE } void clear(); void report(String message); @@ -4435,6 +4502,15 @@ public class HBaseFsck extends Configured implements Closeable { return fixReferenceFiles; } + public void setFixHFileLinkFiles(boolean shouldFix) { + fixHFileLinkFiles = shouldFix; + fixAny |= shouldFix; + } + + boolean shouldFixHFileLinkFiles() { + return fixHFileLinkFiles; + } + public boolean shouldIgnorePreCheckPermission() { return !fixAny || ignorePreCheckPermission; } @@ -4551,6 +4627,7 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" -fixSplitParents Try to force offline split parents to be online."); out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); out.println(" -fixReferenceFiles Try to offline lingering reference store files"); + out.println(" -fixHFileLinkFiles Try to offline lingering HFileLink files"); out.println(" -fixEmptyMetaCells Try to fix hbase:meta entries not referencing any region" + " (empty REGIONINFO_QUALIFIER rows)"); @@ -4562,7 +4639,8 @@ public class HBaseFsck extends Configured implements Closeable { out.println(""); out.println(" Metadata Repair shortcuts"); out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + - "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks"); + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles" + + "-fixHFileLinkFiles -fixTableLocks"); out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); out.println(""); @@ -4692,6 +4770,8 @@ public class HBaseFsck extends Configured implements Closeable { sidelineCorruptHFiles = true; } else if (cmd.equals("-fixReferenceFiles")) { setFixReferenceFiles(true); + } else if (cmd.equals("-fixHFileLinkFiles")) { + setFixHFileLinkFiles(true); } else if (cmd.equals("-fixEmptyMetaCells")) { setFixEmptyMetaCells(true); } else if (cmd.equals("-repair")) { @@ -4707,6 +4787,7 @@ public class HBaseFsck extends Configured implements Closeable { setFixSplitParents(false); setCheckHdfs(true); setFixReferenceFiles(true); + setFixHFileLinkFiles(true); setFixTableLocks(true); } else if (cmd.equals("-repairHoles")) { // this will make all missing hdfs regions available but may lose data diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java index e03a0d5..7d70e32 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java @@ -895,8 +895,8 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck { // TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on // for some time until children references are deleted. HBCK erroneously sees this as // overlapping regions - HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, - false, false, null); + HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, true, + false, false, false, null); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported // assert that the split hbase:meta entry is still there. @@ -979,7 +979,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck { // now fix it. The fix should not revert the region split, but add daughters to META hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, - false, false, null); + false, false, false, null); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, @@ -1692,7 +1692,7 @@ public class TestHBaseFsckOneRS extends BaseTestHBaseFsck { // fix hole assertErrors( doFsck(conf, false, true, false, false, false, false, false, false, false, false, false, - null), + false, null), new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java index cacfca2..219c688 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckTwoRS.java @@ -44,6 +44,10 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegionServer; @@ -279,6 +283,56 @@ public class TestHBaseFsckTwoRS extends BaseTestHBaseFsck { } } + /** + * Test fixing lingering HFileLinks. + */ + @Test(timeout = 180000) public void testLingeringHFileLinks() throws Exception { + TableName table = TableName.valueOf("testLingeringHFileLinks"); + try { + setupTable(table); + + FileSystem fs = FileSystem.get(conf); + Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); + Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0); + String regionName = regionDir.getName(); + Path famDir = new Path(regionDir, FAM_STR); + String HFILE_NAME = "01234567abcd"; + Path hFilePath = new Path(famDir, HFILE_NAME); + + // creating HFile + HFileContext context = new HFileContextBuilder().withIncludesTags(false).build(); + HFile.Writer w = + HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context) + .create(); + w.close(); + + HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME); + + // should report no error + HBaseFsck hbck = doFsck(conf, false); + assertNoErrors(hbck); + + // Delete linked file + fs.delete(hFilePath, true); + + // Check without fix should show the error + hbck = doFsck(conf, false); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fixing the error + hbck = doFsck(conf, true); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK }); + + // Fix should sideline these files, thus preventing the error + hbck = doFsck(conf, false); + assertNoErrors(hbck); + } finally { + cleanupTable(table); + } + } + @Test (timeout=180000) public void testMetaOffline() throws Exception { // check no errors diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java index d1e774e..96a5b96 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java @@ -40,12 +40,12 @@ public class HbckTestingUtil { public static HBaseFsck doFsck( Configuration conf, boolean fix, TableName table) throws Exception { - return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); + return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table); } public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixHdfsOrphans, - boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, + boolean fixTableOrphans, boolean fixVersionFile, boolean fixReferenceFiles, boolean fixHFileLinkFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks, Boolean fixReplication, TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); @@ -60,6 +60,7 @@ public class HbckTestingUtil { fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); fsck.setFixReferenceFiles(fixReferenceFiles); + fsck.setFixHFileLinkFiles(fixHFileLinkFiles); fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo); fsck.setFixTableLocks(fixTableLocks); fsck.setFixReplication(fixReplication);