Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (revision 1487421) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (working copy) @@ -390,16 +390,29 @@ InterruptedException { clearState(); - LOG.info("Loading regionsinfo from the .META. table"); - boolean success = loadMetaEntries(); - if (!success) return -1; - + // get regions according to what is online on each RegionServer + loadDeployedRegions(); + // check whether .META. is deployed and online + if (!recordMetaRegion()) { + // Will remove later if we can fix it + errors.reportError("Fatal error: unable to get .META. region location. Exiting..."); + return -2; + } // Check if .META. is found only once and in the right place if (!checkMetaRegion()) { - // Will remove later if we can fix it - errors.reportError("Encountered fatal error. Exiting..."); + String errorMsg = ".META. table is not consistent. "; + if (shouldFixAssignments()) { + errorMsg += "HBCK will try fixing it. Rerun once .META. is back to consistent state."; + } else { + errorMsg += "Run HBCK with proper fix options to fix .META. inconsistency."; + } + errors.reportError(errorMsg + " Exiting..."); return -2; } + // Not going with further consistency check for tables when META itself is not consistent. + LOG.info("Loading regionsinfo from the .META. table"); + boolean success = loadMetaEntries(); + if (!success) return -1; // Empty cells in .META.? reportEmptyMetaCells(); @@ -414,9 +427,6 @@ reportTablesInFlux(); } - // get regions according to what is online on each RegionServer - loadDeployedRegions(); - // load regiondirs and regioninfos from HDFS if (shouldCheckHdfs()) { loadHdfsRegionDirs(); @@ -1335,10 +1345,13 @@ } catch (KeeperException e) { throw new IOException(e); } - MetaEntry m = - new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis()); - HbckInfo hbInfo = new HbckInfo(m); - regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), hbInfo); + MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis()); + HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName()); + if (hbckInfo == null) { + regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m)); + } else { + hbckInfo.metaEntry = m; + } return true; } @@ -2493,45 +2506,36 @@ * @throws KeeperException * @throws InterruptedException */ - boolean checkMetaRegion() - throws IOException, KeeperException, InterruptedException { - List metaRegions = Lists.newArrayList(); + boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException { + List metaRegions = Lists.newArrayList(); for (HbckInfo value : regionInfoMap.values()) { - if (value.metaEntry.isMetaRegion()) { + if (value.metaEntry != null && value.metaEntry.isMetaRegion()) { metaRegions.add(value); } } - // If something is wrong - if (metaRegions.size() != 1) { - HRegionLocation rootLocation = connection.locateRegion( - HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); - HbckInfo root = - regionInfoMap.get(rootLocation.getRegionInfo().getEncodedName()); - - // If there is no region holding .META. - if (metaRegions.size() == 0) { + // There will be always one entry in regionInfoMap corresponding to .META. + // Check the deployed servers. It should be exactly one server. + HbckInfo metaHbckInfo = metaRegions.get(0); + List servers = metaHbckInfo.deployedOn; + if (servers.size() != 1) { + if (servers.size() == 0) { errors.reportError(ERROR_CODE.NO_META_REGION, ".META. is not found on any region."); if (shouldFixAssignments()) { errors.print("Trying to fix a problem with .META..."); setShouldRerun(); // try to fix it (treat it as unassigned region) - HBaseFsckRepair.fixUnassigned(admin, root.metaEntry); - HBaseFsckRepair.waitUntilAssigned(admin, root.getHdfsHRI()); + HBaseFsckRepair.fixUnassigned(admin, metaHbckInfo.metaEntry); + HBaseFsckRepair.waitUntilAssigned(admin, metaHbckInfo.metaEntry); } - } - // If there are more than one regions pretending to hold the .META. - else if (metaRegions.size() > 1) { - errors.reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region."); + } else if (servers.size() > 1) { + errors + .reportError(ERROR_CODE.MULTI_META_REGION, ".META. is found on more than one region."); if (shouldFixAssignments()) { errors.print("Trying to fix a problem with .META..."); setShouldRerun(); // try fix it (treat is a dupe assignment) - List deployedOn = Lists.newArrayList(); - for (HbckInfo mRegion : metaRegions) { - deployedOn.add(mRegion.metaEntry.regionServer); - } - HBaseFsckRepair.fixMultiAssignment(admin, root.metaEntry, deployedOn); + HBaseFsckRepair.fixMultiAssignment(admin, metaHbckInfo.metaEntry, servers); } } // rerun hbck with hopefully fixed META @@ -2546,15 +2550,6 @@ * @throws IOException if an error is encountered */ boolean loadMetaEntries() throws IOException { - - // get a list of all regions from the master. This involves - // scanning the META table - if (!recordMetaRegion()) { - // Will remove later if we can fix it - errors.reportError("Fatal error: unable to get root region location. Exiting..."); - return false; - } - MetaScannerVisitor visitor = new MetaScannerVisitorBase() { int countRecord = 1; @@ -2588,9 +2583,12 @@ } PairOfSameType daughters = HRegionInfo.getDaughterRegions(result); MetaEntry m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond()); - HbckInfo hbInfo = new HbckInfo(m); - HbckInfo previous = regionInfoMap.put(hri.getEncodedName(), hbInfo); - if (previous != null) { + HbckInfo previous = regionInfoMap.get(hri.getEncodedName()); + if (previous == null) { + regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m)); + } else if (previous.metaEntry == null) { + previous.metaEntry = m; + } else { throw new IOException("Two entries in META are same " + previous); } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (revision 1487421) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (working copy) @@ -60,15 +60,16 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.io.hfile.TestHFile; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; @@ -232,7 +233,9 @@ HRegionInfo hri) throws IOException, InterruptedException { try { HBaseFsckRepair.closeRegionSilentlyAndWait(admin, sn, hri); - admin.offline(hri.getRegionName()); + if (!hri.isMetaTable()) { + admin.offline(hri.getRegionName()); + } } catch (IOException ioe) { LOG.warn("Got exception when attempting to offline region " + Bytes.toString(hri.getRegionName()), ioe); @@ -2003,6 +2006,57 @@ writeLock.release(); // release for clean state } + @Test + public void testMetaOffline() throws Exception { + // check no errors + HBaseFsck hbck = doFsck(conf, false); + assertNoErrors(hbck); + deleteMetaRegion(conf, true, false, false); + hbck = doFsck(conf, false); + // ERROR_CODE.UNKNOWN is coming because we reportError with a message for the .META. + // inconsistency and whether we will be fixing it or not. + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN }); + hbck = doFsck(conf, true); + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN }); + hbck = doFsck(conf, false); + assertNoErrors(hbck); + } + + private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs, + boolean regionInfoOnly) throws IOException, InterruptedException { + HConnection connection = HConnectionManager.getConnection(conf); + HRegionLocation metaLocation = connection.locateRegion(HConstants.META_TABLE_NAME, + HConstants.EMPTY_START_ROW); + ServerName hsa = new ServerName(metaLocation.getHostnamePort(), 0L); + HRegionInfo hri = metaLocation.getRegionInfo(); + if (unassign) { + LOG.info("Undeploying meta region " + hri + " from server " + hsa); + undeployRegion(new HBaseAdmin(conf), hsa, hri); + } + + if (regionInfoOnly) { + LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString()); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = rootDir.getFileSystem(conf); + Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(), + hri.getEncodedName()); + Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE); + fs.delete(hriPath, true); + } + + if (hdfs) { + LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = rootDir.getFileSystem(conf); + Path p = new Path(rootDir + "/" + HTableDescriptor.META_TABLEDESC.getNameAsString(), + hri.getEncodedName()); + HBaseFsck.debugLsr(conf, p); + boolean success = fs.delete(p, true); + LOG.info("Deleted " + p + " sucessfully? " + success); + HBaseFsck.debugLsr(conf, p); + } + } + @org.junit.Rule public TestName name = new TestName(); }