commit 0a23bab603c7a3dc9a1ca31828521f58ffe7a3cd Author: Todd Lipcon Date: Mon Jan 3 17:58:36 2011 -0800 hbck should pause before re-checking Fix up TestHBaseFsck diff --git src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 665b197..242d8a6 100644 --- src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -69,6 +69,7 @@ import com.google.common.collect.Lists; */ public class HBaseFsck { public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute + public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000; private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName()); private Configuration conf; @@ -918,8 +919,8 @@ public class HBaseFsck { * Fix inconsistencies found by fsck. This should try to fix errors (if any) * found by fsck utility. */ - void setFixErrors() { - fix = true; + void setFixErrors(boolean shouldFix) { + fix = shouldFix; } boolean shouldFix() { @@ -943,6 +944,8 @@ public class HBaseFsck { " have not experienced any metadata updates in the last " + " {{timeInSeconds} seconds."); System.err.println(" -fix Try to fix some of the errors."); + System.err.println(" -sleepBeforeRerun {timeInSeconds} Sleep this many seconds" + + " before checking if the fix worked if run with -fix"); System.err.println(" -summary Print only summary of the tables and status."); Runtime.getRuntime().exit(-2); @@ -959,6 +962,7 @@ public class HBaseFsck { Configuration conf = HBaseConfiguration.create(); conf.set("fs.defaultFS", conf.get("hbase.rootdir")); HBaseFsck fsck = new HBaseFsck(conf); + long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN; // Process command-line args. for (int i = 0; i < args.length; i++) { @@ -978,8 +982,20 @@ public class HBaseFsck { printUsageAndExit(); } i++; + } else if (cmd.equals("-sleepBeforeRerun")) { + if (i == args.length - 1) { + System.err.println("HBaseFsck: -sleepBeforeRerun needs a value."); + printUsageAndExit(); + } + try { + sleepBeforeRerun = Long.parseLong(args[i+1]); + } catch (NumberFormatException e) { + System.err.println("-sleepBeforeRerun needs a numeric value."); + printUsageAndExit(); + } + i++; } else if (cmd.equals("-fix")) { - fsck.setFixErrors(); + fsck.setFixErrors(true); } else if (cmd.equals("-summary")) { fsck.setSummary(); } else { @@ -996,6 +1012,14 @@ public class HBaseFsck { // We run it only once more because otherwise we can easily fall into // an infinite loop. if (fsck.shouldRerun()) { + try { + LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix..."); + Thread.sleep(sleepBeforeRerun); + } catch (InterruptedException ie) { + Runtime.getRuntime().exit(code); + } + // Just report + fsck.setFixErrors(false); code = fsck.doWork(); } diff --git src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 35623f7..a055082 100644 --- src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -50,19 +50,24 @@ public class TestHBaseFsck { TEST_UTIL.startMiniCluster(3); } - @Test - public void testHBaseFsck() throws Exception { + private int doFsck(boolean fix) throws Exception { HBaseFsck fsck = new HBaseFsck(conf); fsck.displayFullReport(); fsck.setTimeLag(0); + fsck.setFixErrors(fix); // Most basic check ever, 0 tables - int result = fsck.doWork(); + return fsck.doWork(); + } + + @Test + public void testHBaseFsck() throws Exception { + int result = doFsck(false); assertEquals(0, result); TEST_UTIL.createTable(TABLE, FAM); // We created 1 table, should be fine - result = fsck.doWork(); + result = doFsck(false); assertEquals(0, result); // Now let's mess it up and change the assignment in .META. to @@ -92,14 +97,14 @@ public class TestHBaseFsck { } } - // We set this here, but it's really not fixing anything... - fsck.setFixErrors(); - result = fsck.doWork(); - // Fixed or not, it still reports inconsistencies + // Try to fix the data + result = doFsck(true); assertEquals(-1, result); Thread.sleep(15000); - // Disabled, won't work because the region stays unassigned, see HBASE-3217 - // new HTable(conf, TABLE).getScanner(new Scan()); + result = doFsck(false); + // Should have fixed + assertEquals(0, result); + new HTable(conf, TABLE).getScanner(new Scan()); } }