diff --git a/hbase-branch-1.4/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-branch-1.4/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java old mode 100755 new mode 100644 index d0fa17d..beec0c3 --- a/hbase-branch-1.4/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-branch-1.4/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -241,6 +241,7 @@ public class HBaseFsck extends Configured implements Closeable { /*********** * Options ***********/ + private static boolean offline = false; // the cluster is online/offline private static boolean details = false; // do we display the full report private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older private static boolean forceExclusive = false; // only this hbck can modify HBase @@ -527,12 +528,15 @@ public class HBaseFsck extends Configured implements Closeable { } }); - LOG.info("Launching hbck"); - connection = (ClusterConnection)ConnectionFactory.createConnection(getConf()); - admin = connection.getAdmin(); - meta = connection.getTable(TableName.META_TABLE_NAME); - status = admin.getClusterStatus(); + if (!offline) { + + LOG.info("Launching hbck"); + connection = (ClusterConnection) ConnectionFactory.createConnection(getConf()); + admin = connection.getAdmin(); + meta = connection.getTable(TableName.META_TABLE_NAME); + status = admin.getClusterStatus(); + } } /** @@ -610,7 +614,7 @@ public class HBaseFsck extends Configured implements Closeable { public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { // Initial pass to fix orphans. if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles() - || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) { + || shouldFixHdfsOverlaps() || shouldFixTableOrphans() || shouldFixVersionFile())) { LOG.info("Loading regioninfos HDFS"); // if nothing is happening this should always complete in two iterations. int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3); @@ -758,17 +762,41 @@ public class HBaseFsck extends Configured implements Closeable { * Contacts the master and prints out cluster-wide information * @return 0 on success, non-zero on failure */ - public int onlineHbck() + public int doHbck() throws IOException, KeeperException, InterruptedException, ServiceException { - // print hbase server version - errors.print("Version: " + status.getHBaseVersion()); // Clean start clearState(); + + // first, we should do offline repair. + offlineHbck(); + + // do not do it if the cluster is offline + if (!offline) { + onlineHbck(); + } + + // Remove the hbck lock + unlockHbck(); + + // Print table summary + printTableSummary(tablesInfo); + return errors.summarize(); + } + + private void offlineHbck() throws IOException, InterruptedException, KeeperException { // Do offline check and repair first offlineHdfsIntegrityRepair(); offlineReferenceFileRepair(); offlineHLinkFileRepair(); + checkAndFixTableLocks(); + // Check (and fix if requested) orphaned table ZNodes + checkAndFixOrphanedTableZNodes(); + } + + private void onlineHbck() throws IOException, KeeperException, InterruptedException { + // print hbase server version + errors.print("Version: " + status.getHBaseVersion()); // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online // hbck, it is likely that hbck would be misled and report transient errors. Therefore, it // is better to set Master into maintenance mode during online hbck. @@ -784,22 +812,10 @@ public class HBaseFsck extends Configured implements Closeable { checkRegionBoundaries(); } - checkAndFixTableLocks(); - - // Check (and fix if requested) orphaned table ZNodes - checkAndFixOrphanedTableZNodes(); - checkAndFixReplication(); // Remove the hbck znode cleanupHbckZnode(); - - // Remove the hbck lock - unlockHbck(); - - // Print table summary - printTableSummary(tablesInfo); - return errors.summarize(); } public static byte[] keyOnly (byte[] b) { @@ -4497,6 +4513,13 @@ public class HBaseFsck extends Configured implements Closeable { }; /** + * set the cluster is online or offline + */ + public static void setOffline() { + offline = true; + } + + /** * Display the full report from fsck. This displays all live and dead region * servers, and all known regions. */ @@ -4795,6 +4818,7 @@ public class HBaseFsck extends Configured implements Closeable { out.println("Usage: fsck [opts] {only tables}"); out.println(" where [opts] are:"); out.println(" -help Display help options (this)"); + out.println(" -offline The hbase cluster is online/offlie?"); out.println(" -details Display full report of all regions."); out.println(" -timelag Process only regions that " + " have not experienced any metadata updates in the last " + @@ -4905,6 +4929,8 @@ public class HBaseFsck extends Configured implements Closeable { String cmd = args[i]; if (cmd.equals("-help") || cmd.equals("-h")) { return printUsageAndExit(); + } else if (cmd.equals("-offline")) { + setOffline(); } else if (cmd.equals("-details")) { setDisplayFullReport(); } else if (cmd.equals("-exclusive")) { @@ -5092,7 +5118,7 @@ public class HBaseFsck extends Configured implements Closeable { } // check and fix table integrity, region consistency. - int code = onlineHbck(); + int code = doHbck(); setRetCode(code); // If we have changed the HBase state it is better to run hbck again // to see if we haven't broken something else in the process. @@ -5114,7 +5140,7 @@ public class HBaseFsck extends Configured implements Closeable { setFixVersionFile(false); setFixTableOrphans(false); errors.resetErrors(); - code = onlineHbck(); + code = doHbck(); setRetCode(code); } } finally { diff --git a/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java old mode 100755 new mode 100644 index 917d7d3..e1fffd4 --- a/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -372,7 +372,7 @@ public class TestHBaseFsck { hbck.setFixReferenceFiles(true); hbck.setFixHFileLinks(true); hbck.connect(); - hbck.onlineHbck(); + hbck.doHbck(); hbck.close(); hbck = doFsck(conf, false); @@ -441,7 +441,7 @@ public class TestHBaseFsck { hbck.setFixReferenceFiles(true); hbck.setFixHFileLinks(true); hbck.connect(); - hbck.onlineHbck(); + hbck.doHbck(); hbck.close(); hbck = doFsck(conf, false); @@ -1258,7 +1258,7 @@ public class TestHBaseFsck { fsck.setFixVersionFile(true); fsck.setSidelineBigOverlaps(true); fsck.setMaxMerge(2); - fsck.onlineHbck(); + fsck.doHbck(); fsck.close(); // verify that overlaps are fixed, and there are less rows @@ -1883,7 +1883,7 @@ public class TestHBaseFsck { hbck.setDisplayFullReport(); // i.e. -details hbck.setTimeLag(0); hbck.setFixSplitParents(true); - hbck.onlineHbck(); + hbck.doHbck(); assertTrue(hbck.shouldRerun()); hbck.close(); @@ -2155,7 +2155,7 @@ public class TestHBaseFsck { fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); - fsck.onlineHbck(); + fsck.doHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); fsck.close(); @@ -2167,9 +2167,9 @@ public class TestHBaseFsck { fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.setFixAssignments(true); - fsck.onlineHbck(); + fsck.doHbck(); assertTrue(fsck.shouldRerun()); - fsck.onlineHbck(); + fsck.doHbck(); assertNoErrors(fsck); assertEquals(ROWKEYS.length, countRows()); @@ -2208,7 +2208,7 @@ public class TestHBaseFsck { fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); - fsck.onlineHbck(); + fsck.doHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); @@ -2221,7 +2221,7 @@ public class TestHBaseFsck { fsck.setCheckHdfs(false); fsck.setFixAssignments(true); fsck.setFixMeta(true); - fsck.onlineHbck(); + fsck.doHbck(); assertFalse(fsck.shouldRerun()); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN }); @@ -2272,7 +2272,7 @@ public class TestHBaseFsck { fsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setCheckHdfs(false); - fsck.onlineHbck(); + fsck.doHbck(); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN}); fsck.close(); @@ -2286,7 +2286,7 @@ public class TestHBaseFsck { fsck.setFixHdfsHoles(true); fsck.setFixHdfsOverlaps(true); fsck.setFixHdfsOrphans(true); - fsck.onlineHbck(); + fsck.doHbck(); assertFalse(fsck.shouldRerun()); assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN}); fsck.close(); diff --git a/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java b/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java index 37f900f..8a1105a 100755 --- a/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java +++ b/hbase-branch-1.4/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java @@ -69,7 +69,7 @@ public class HbckTestingUtil { if (table != null) { fsck.includeTable(table); } - fsck.onlineHbck(); + fsck.doHbck(); fsck.close(); return fsck; }