commit 679d8e16a2665254dff53961e4f0c5211f506328 Author: Todd Lipcon Date: Thu Dec 30 18:14:25 2010 -0800 hbck to fix case where region is on fs but not in META TODO: should double-check that there is in fact a hole in META at this range! hbck fix continued diff --git src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index dfb7043..387c1b4 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -47,6 +47,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -422,6 +423,26 @@ public class HRegion implements HeapSize { // , Writable{ } } + /** + * Read the region info from the filesystem in the given directory. + */ + public static HRegionInfo readRegionInfo(Configuration conf, Path regionDir) + throws IOException { + FileSystem fs = regionDir.getFileSystem(conf); + Path hriFile = new Path(regionDir, HRegion.REGIONINFO_FILE); + if (!fs.exists(hriFile)) { + throw new IOException("Unable to repair, no regioninfo file at " + hriFile); + } + FSDataInputStream fsi = fs.open(hriFile); + try { + HRegionInfo hri = new HRegionInfo(); + hri.readFields(fsi); + return hri; + } finally { + fsi.close(); + } + } + /** @return a HRegionInfo object for this region */ public HRegionInfo getRegionInfo() { return this.regionInfo; diff --git src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 8b64738..242d8a6 100644 --- src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; @@ -130,7 +131,7 @@ public class HBaseFsck { errors.reportError("Encountered fatal error. Exitting..."); return -1; } - getMetaEntries(); + scanMeta(); // Check if .META. is found only once and on the right place if (!checkMetaEntries()) { @@ -418,8 +419,22 @@ public class HBaseFsck { errors.reportError("Region " + descriptiveName + ", key=" + key + ", not on HDFS or in META but " + "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); } else if (!inMeta && inHdfs && !isDeployed) { - errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " + - "or deployed on any region server."); + StringBuilder err = new StringBuilder(); + err.append("Region " + descriptiveName + " on HDFS, but not listed in META " + + "or deployed on any region server.\n"); + try { + HRegionInfo hri = HRegion.readRegionInfo(conf, hbi.foundRegionDir.getPath()); + err.append(" Region info: " + hri); + } catch (IOException ioe) { + err.append(" Also unable to read region info off filesystem: " + ioe.getMessage()); + } + errors.reportError(err.toString()); + + if (shouldFix()) { + errors.print("Trying to fix region missing from META..."); + setShouldRerun(); + HBaseFsckRepair.fixMissingFromMeta(this.conf, hbi.foundRegionDir.getPath()); + } } else if (!inMeta && inHdfs && isDeployed) { errors.reportError("Region " + descriptiveName + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn)); @@ -433,6 +448,7 @@ public class HBaseFsck { "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { errors.reportError("Region " + descriptiveName + " not deployed on any region server."); + // If we are trying to fix the errors if (shouldFix()) { errors.print("Trying to fix unassigned region..."); @@ -546,14 +562,14 @@ public class HBaseFsck { while (true) { // Check if chain is broken if (!edges.containsKey(last)) { - errors.detail("Chain of regions in table " + tableName + + errors.reportError("Chain of regions in table " + tableName + " is broken; edges does not contain " + Bytes.toString(last)); return false; } next = edges.get(last); // Found a cycle if (visited.contains(next)) { - errors.detail("Chain of regions in table " + tableName + + errors.reportError("Chain of regions in table " + tableName + " has a cycle around " + Bytes.toString(next)); return false; } @@ -563,7 +579,7 @@ public class HBaseFsck { if (next.length == 0) { // If we have visited all elements we are fine if (edges.size() != visited.size()) { - errors.detail("Chain of regions in table " + tableName + + errors.reportError("Chain of regions in table " + tableName + " contains less elements than are listed in META; visited=" + visited.size() + ", edges=" + edges.size()); return false; @@ -677,10 +693,14 @@ public class HBaseFsck { } /** - * Scan .META. and -ROOT-, adding all regions found to the regionInfo map. + * Scan .META. and -ROOT-. + * * @throws IOException if an error is encountered */ - void getMetaEntries() throws IOException { + void scanMeta() throws IOException { MetaScannerVisitor visitor = new MetaScannerVisitor() { int countRecord = 1; diff --git src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java index b624d28..2662c11 100644 --- src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java +++ src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java @@ -23,13 +23,18 @@ import java.io.IOException; import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; @@ -78,6 +83,30 @@ public class HBaseFsckRepair { forceOfflineInZK(conf, actualRegion); } + /** + * Fix the case where a region is on the filesystem but not in META. + * This is done by reading the regioninfo off the filesystem and inserting + * into META, then creating an offline node in ZK. + */ + public static void fixMissingFromMeta(Configuration conf, Path regionDir) + throws IOException, KeeperException { + HRegionInfo hri = HRegion.readRegionInfo(conf, regionDir); + + HConnection conn = HConnectionManager.getConnection(conf); + CatalogTracker tracker = new CatalogTracker(conn); + try { + tracker.start(); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException(ie); + } + try { + MetaEditor.updateRegionInfo(tracker, hri); + } finally { + tracker.stop(); + } + } + private static void forceOfflineInZK(Configuration conf, HRegionInfo region) throws ZooKeeperConnectionException, KeeperException, IOException { ZKAssign.createOrForceNodeOffline( @@ -105,4 +134,4 @@ public class HBaseFsckRepair { throw new IOException("Region " + region + " failed to close within" + " timeout " + timeout); } -} \ No newline at end of file +}