Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -1509,7 +1509,7 @@ getOutboundMsgs().add(new HMsg(HMsg.Type.MSG_REPORT_PROCESS_OPEN, hri)); } - protected void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted) + public void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted) throws IOException { RSZookeeperUpdater zkUpdater = null; if(reportWhenCompleted) { @@ -2549,4 +2549,4 @@ public int getNumberOfOnlineRegions() { return onlineRegions.size(); } -} \ No newline at end of file +} Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -1401,4 +1401,10 @@ public static void main(String [] args) { doMain(args, HMaster.class); } + + @Override + public void clearFromTransition(HRegionInfo region) { + this.regionManager.clearFromInTransition(region.getRegionName()); + LOG.info("Cleared region " + region + " from transition map"); + } } Index: src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (working copy) @@ -76,6 +76,7 @@ *
  • Version 22: HBASE-2209. Added List support to RPC
  • *
  • Version 23: HBASE-2066, multi-put.
  • *
  • Version 24: HBASE-2473, create table with regions.
  • + *
  • Version 24: HBASE-2819, HBCK changes to master and RS
  • * */ public static final long versionID = 24L; Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (working copy) @@ -22,6 +22,7 @@ import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.io.Writable; @@ -128,4 +129,10 @@ * @return status object */ public ClusterStatus getClusterStatus(); + + /** + * Clears the specified region from being in transition. Used by HBaseFsck. + * @param region region to clear from transition map + */ + public void clearFromTransition(HRegionInfo region); } Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy) @@ -298,4 +298,12 @@ */ public void replicateLogEntries(HLog.Entry[] entries) throws IOException; + /** + * Closes the specified region. + * @param hri region to be closed + * @param reportWhenCompleted whether to report to master + * @throws IOException + */ + public void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted) + throws IOException; } Index: src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java (working copy) @@ -86,6 +86,28 @@ MetaScannerVisitor visitor, byte[] tableName, byte[] row, int rowLimit) throws IOException { + metaScan(configuration, visitor, HConstants.META_TABLE_NAME, tableName, row, rowLimit); + } + + /** + * Scans the meta table and calls a visitor on each RowResult. Uses a table + * name and a row name to locate meta regions. And it only scans at most + * rowLimit of rows. + * + * @param configuration HBase configuration. + * @param visitor Visitor object. + * @param metaTableName Meta table name (usually .META.) + * @param tableName User table name. + * @param row Name of the row at the user table. The scan will start from + * the region row where the row resides. + * @param rowLimit Max of processed rows. If it is less than 0, it + * will be set to default value Integer.MAX_VALUE. + * @throws IOException e + */ + public static void metaScan(Configuration configuration, MetaScannerVisitor visitor, + byte[] metaTableName, byte[] tableName, byte[] row, + int rowLimit) + throws IOException { int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE; HConnection connection = HConnectionManager.getConnection(configuration); @@ -99,7 +121,7 @@ HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false); - HTable metaTable = new HTable(configuration, HConstants.META_TABLE_NAME); + HTable metaTable = new HTable(configuration, metaTableName); Result startRowResult = metaTable.getRowOrBefore(searchRow, HConstants.CATALOG_FAMILY); if (startRowResult == null) { @@ -132,7 +154,7 @@ configuration.getInt("hbase.meta.scanner.caching", 100)); do { final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY); - callable = new ScannerCallable(connection, HConstants.META_TABLE_NAME, + callable = new ScannerCallable(connection, metaTableName, scan); // Open scanner connection.getRegionServerWithRetries(callable); Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java (revision 999137) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java (working copy) @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.List; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.atomic.AtomicInteger; @@ -37,36 +38,42 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; -import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; + /** * Check consistency among the in-memory states of the master and the * region server(s) and the state of data in HDFS. */ -public class HBaseFsck extends HBaseAdmin { +public class HBaseFsck { public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName()); private Configuration conf; - private FileSystem fs; - private Path rootDir; private ClusterStatus status; - private HMasterInterface master; private HConnection connection; - private TreeMap metaEntries; + private TreeMap regionInfo = new TreeMap(); + private TreeMap tablesInfo = new TreeMap(); + ErrorReporter errors = new PrintingErrorReporter(); - private boolean details = false; // do we display the full report? + private static boolean details = false; // do we display the full report private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older + private boolean fix = false; // do we want to try fixing the errors? + private boolean rerun = false; // if we tried to fix something rerun hbck + private static boolean summary = false; // if we want to print less output /** * Constructor @@ -76,19 +83,12 @@ */ public HBaseFsck(Configuration conf) throws MasterNotRunningException, IOException { - super(conf); this.conf = conf; - // setup filesystem properties - this.rootDir = new Path(conf.get(HConstants.HBASE_DIR)); - this.fs = rootDir.getFileSystem(conf); - - // fetch information from master - master = getMaster(); - status = master.getClusterStatus(); - connection = getConnection(); - this.metaEntries = new TreeMap(); + HBaseAdmin admin = new HBaseAdmin(conf); + status = admin.getMaster().getClusterStatus(); + connection = admin.getConnection(); } /** @@ -97,24 +97,41 @@ * @return 0 on success, non-zero on failure */ int doWork() throws IOException { + // print hbase server version - System.out.println("Version: " + status.getHBaseVersion()); + errors.print("Version: " + status.getHBaseVersion()); + // Make sure regionInfo is empty before starting + regionInfo.clear(); + tablesInfo.clear(); + // get a list of all regions from the master. This involves // scanning the META table - getMetaEntries(metaEntries); + if (!recordRootRegion()) { + // Will remove later if we can fix it + errors.reportError("Encountered fatal error. Exitting..."); + return -1; + } + getMetaEntries(); + // Check if .META. is found only once and on the right place + if (!checkMetaEntries()) { + // Will remove later if we can fix it + errors.reportError("Encountered fatal error. Exitting..."); + return -1; + } + // get a list of all tables that have not changed recently. AtomicInteger numSkipped = new AtomicInteger(0); - HTableDescriptor[] allTables = getTables(metaEntries, numSkipped); - System.out.println("Number of Tables: " + allTables.length); + HTableDescriptor[] allTables = getTables(numSkipped); + errors.print("Number of Tables: " + allTables.length); if (details) { if (numSkipped.get() > 0) { - System.out.println("\n Number of Tables in flux: " + numSkipped.get()); + errors.detail("\n Number of Tables in flux: " + numSkipped.get()); } for (HTableDescriptor td : allTables) { String tableName = td.getNameAsString(); - System.out.println("\t Table: " + tableName + "\t" + + errors.detail("\t Table: " + tableName + "\t" + (td.isReadOnly() ? "ro" : "rw") + "\t" + (td.isRootRegion() ? "ROOT" : (td.isMetaRegion() ? "META" : " ")) + "\t" + @@ -124,221 +141,375 @@ // From the master, get a list of all known live region servers Collection regionServers = status.getServerInfo(); - System.out.println("Number of live region servers:" + + errors.print("Number of live region servers:" + regionServers.size()); if (details) { for (HServerInfo rsinfo: regionServers) { - System.out.println("\t RegionServer:" + rsinfo.getServerName()); + errors.detail("\t RegionServer:" + rsinfo.getServerName()); } } // From the master, get a list of all dead region servers Collection deadRegionServers = status.getDeadServerNames(); - System.out.println("Number of dead region servers:" + + errors.print("Number of dead region servers:" + deadRegionServers.size()); if (details) { for (String name: deadRegionServers) { - System.out.println("\t RegionServer(dead):" + name); + errors.detail("\t RegionServer(dead):" + name); } } - // process information from all region servers - boolean status1 = processRegionServers(regionServers); + // Determine what's deployed + processRegionServers(regionServers); - // match HDFS with META - boolean status2 = checkHdfs(); + // Determine what's on HDFS + checkHdfs(); - if (status1 == true && status2 == true) { - System.out.println("\nRest easy, buddy! HBase is clean. "); - return 0; - } else { - System.out.println("\nInconsistencies detected."); - return -1; - } + // Check consistency + checkConsistency(); + + // Check integrity + checkIntegrity(); + + // Print table summary + printTableSummary(); + + return errors.summarize(); } /** - * Checks HDFS and META - * @return true if there were no errors, otherwise return false + * Scan HDFS for all regions, recording their information into + * regionInfo */ - boolean checkHdfs() throws IOException { + void checkHdfs() throws IOException { + Path rootDir = new Path(conf.get(HConstants.HBASE_DIR)); + FileSystem fs = rootDir.getFileSystem(conf); - boolean status = true; // success - - // make a copy of all tables in META - TreeMap regions = new TreeMap(); - for (MetaEntry meta: metaEntries.values()) { - regions.put(meta.getTableDesc().getNameAsString(), meta); - } - // list all tables from HDFS - TreeMap allTableDirs = new TreeMap(); + List tableDirs = Lists.newArrayList(); + + boolean foundVersionFile = false; FileStatus[] files = fs.listStatus(rootDir); - for (int i = 0; files != null && i < files.length; i++) { - allTableDirs.put(files[i].getPath(), files[i]); + for (FileStatus file : files) { + if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) { + foundVersionFile = true; + } else { + tableDirs.add(file); + } } - - // verify that -ROOT-, .META directories exists. - Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME)); - FileStatus ignore = allTableDirs.remove(rdir); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist."); - } - Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME)); - ignore = allTableDirs.remove(mdir); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Path " + mdir + " for META table does not exist."); - } - + // verify that version file exists - Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME); - ignore = allTableDirs.remove(vfile); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Version file " + vfile + " does not exist."); + if (!foundVersionFile) { + errors.reportError("Version file does not exist in root dir " + rootDir); } - // filter out all valid regions found in the META - for (HRegionInfo rinfo: metaEntries.values()) { - Path tableDir = HTableDescriptor.getTableDir(rootDir, - rinfo.getTableDesc().getName()); - // Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName()); - // if the entry exists in allTableDirs, then remove it from allTableDirs as well - // as from the META tmp list - FileStatus found = allTableDirs.remove(tableDir); - if (found != null) { - regions.remove(tableDir.getName()); - } - } + // level 1: /* + for (FileStatus tableDir : tableDirs) { + String tableName = tableDir.getPath().getName(); + // ignore hidden files + if (tableName.startsWith(".") && + !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) + continue; + // level 2: //* + FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); + for (FileStatus regionDir : regionDirs) { + String encodedName = regionDir.getPath().getName(); - // The remaining entries in allTableDirs do not have entries in .META - // However, if the path name was modified in the last few milliseconds - // as specified by timelag, then do not flag it as an inconsistency. - long now = System.currentTimeMillis(); - for (FileStatus region: allTableDirs.values()) { - if (region.getModificationTime() + timelag < now) { - String finalComponent = region.getPath().getName(); - if (!finalComponent.startsWith(".")) { - // ignore .logs and .oldlogs directories - System.out.print("\nERROR: Path " + region.getPath() + - " does not have a corresponding entry in META."); - status = false; + // ignore directories that aren't hexadecimal + if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue; + + HbckInfo hbi = getOrCreateInfo(encodedName); + hbi.foundRegionDir = regionDir; + + // Set a flag if this region contains only edits + // This is special case if a region is left after split + hbi.onlyEdits = true; + FileStatus[] subDirs = fs.listStatus(regionDir.getPath()); + Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath()); + for (FileStatus subDir : subDirs) { + String sdName = subDir.getPath().getName(); + if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) { + hbi.onlyEdits = false; + break; + } } } } - - // the remaining entries in tmp do not have entries in HDFS - for (HRegionInfo rinfo: regions.values()) { - System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() + - " does not have a corresponding entry in HDFS."); - status = false; + } + + /** + * Record the location of the ROOT region as found in ZooKeeper, + * as if it were in a META table. This is so that we can check + * deployment of ROOT. + */ + boolean recordRootRegion() throws IOException { + HRegionLocation rootLocation = connection.locateRegion( + HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); + + // Check if Root region is valid and existing + if (rootLocation == null || rootLocation.getRegionInfo() == null || + rootLocation.getServerAddress() == null) { + errors.reportError("Root Region or some of its attributes is null."); + return false; } - return status; + + MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(), + rootLocation.getServerAddress(), null, System.currentTimeMillis()); + HbckInfo hbInfo = new HbckInfo(m); + regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo); + return true; } + /** * Contacts each regionserver and fetches metadata about regions. * @param regionServerList - the list of region servers to connect to * @throws IOException if a remote or network exception occurs - * @return true if there were no errors, otherwise return false */ - boolean processRegionServers(Collection regionServerList) + void processRegionServers(Collection regionServerList) throws IOException { - // make a copy of all entries in META - TreeMap tmp = - new TreeMap(metaEntries); - long errorCount = 0; // number of inconsistencies detected - int showProgress = 0; - // loop to contact each region server - for (HServerInfo rsinfo: regionServerList) { - showProgress++; // one more server. + for (HServerInfo rsinfo:regionServerList) { + errors.progress(); try { HRegionInterface server = connection.getHRegionConnection( rsinfo.getServerAddress()); // list all online regions from this region server HRegionInfo[] regions = server.getRegionsAssignment(); + if (details) { - System.out.print("\nRegionServer:" + rsinfo.getServerName() + - " number of regions:" + regions.length); + errors.detail("\nRegionServer:" + rsinfo.getServerName() + + " number of regions:" + regions.length); for (HRegionInfo rinfo: regions) { - System.out.print("\n\t name:" + rinfo.getRegionNameAsString() + - " id:" + rinfo.getRegionId() + - " encoded name:" + rinfo.getEncodedName() + - " start :" + Bytes.toStringBinary(rinfo.getStartKey()) + - " end :" + Bytes.toStringBinary(rinfo.getEndKey())); + errors.detail("\n\t name:" + rinfo.getRegionNameAsString() + + " id:" + rinfo.getRegionId() + + " encoded name:" + rinfo.getEncodedName() + + " start :" + Bytes.toStringBinary(rinfo.getStartKey()) + + " end :" + Bytes.toStringBinary(rinfo.getEndKey())); } - showProgress = 0; } // check to see if the existance of this region matches the region in META - for (HRegionInfo r: regions) { - MetaEntry metaEntry = metaEntries.get(r); - - // this entry exists in the region server but is not in the META - if (metaEntry == null) { - if (r.isMetaRegion()) { - continue; // this is ROOT or META region - } - System.out.print("\nERROR: Region " + r.getRegionNameAsString() + - " found on server " + rsinfo.getServerAddress() + - " but is not listed in META."); - errorCount++; - showProgress = 0; - continue; - } - if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) { - System.out.print("\nERROR: Region " + r.getRegionNameAsString() + - " found on server " + rsinfo.getServerAddress() + - " but is listed in META to be on server " + - metaEntry.regionServer); - errorCount++; - showProgress = 0; - } - - // The region server is indeed serving a valid region. Remove it from tmp - tmp.remove(r); + for (HRegionInfo r:regions) { + HbckInfo hbi = getOrCreateInfo(r.getEncodedName()); + hbi.deployedOn.add(rsinfo.getServerAddress()); } } catch (IOException e) { // unable to connect to the region server. - if (details) { - System.out.print("\nRegionServer:" + rsinfo.getServerName() + - " Unable to fetch region information. " + e); - } + errors.reportError("RegionServer: " + rsinfo.getServerName() + + " Unable to fetch region information. " + e); } - if (showProgress % 10 == 0) { - System.out.print("."); // show progress to user - showProgress = 0; + } + } + + /** + * Check consistency of all regions that have been found in previous phases. + */ + void checkConsistency() throws IOException { + for (HbckInfo hbi : regionInfo.values()) { + doConsistencyCheck(hbi); + } + } + + /** + * Check a single region for consistency and correct deployment. + */ + void doConsistencyCheck(HbckInfo hbi) throws IOException { + String descriptiveName = hbi.toString(); + + boolean inMeta = hbi.metaEntry != null; + boolean inHdfs = hbi.foundRegionDir != null; + boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null; + boolean isDeployed = !hbi.deployedOn.isEmpty(); + boolean isMultiplyDeployed = hbi.deployedOn.size() > 1; + boolean deploymentMatchesMeta = + hasMetaAssignment && isDeployed && !isMultiplyDeployed && + hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0)); + boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline(); + boolean recentlyModified = hbi.foundRegionDir != null && + hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis(); + + // ========== First the healthy cases ============= + if (hbi.onlyEdits) { + return; + } + if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) { + return; + } else if (inMeta && !shouldBeDeployed && !isDeployed) { + // offline regions shouldn't cause complaints + LOG.debug("Region " + descriptiveName + " offline, ignoring."); + return; + } else if (recentlyModified) { + LOG.info("Region " + descriptiveName + " was recently modified -- skipping"); + return; + } + // ========== Cases where the region is not in META ============= + else if (!inMeta && !inHdfs && !isDeployed) { + // We shouldn't have record of this region at all then! + assert false : "Entry for region with no data"; + } else if (!inMeta && !inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " + + "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (!inMeta && inHdfs && !isDeployed) { + errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " + + "or deployed on any region server."); + } else if (!inMeta && inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " not in META, but deployed on " + + Joiner.on(", ").join(hbi.deployedOn)); + + // ========== Cases where the region is in META ============= + } else if (inMeta && !inHdfs && !isDeployed) { + errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " + + "or deployed on any region server."); + } else if (inMeta && !inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " + + "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { + errors.reportError("Region " + descriptiveName + " not deployed on any region server."); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix unassigned region..."); + setShouldRerun(); + HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry); } + } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { + errors.reportError("Region " + descriptiveName + " has should not be deployed according " + + "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (inMeta && inHdfs && isMultiplyDeployed) { + errors.reportError("Region " + descriptiveName + " is listed in META on region server " + + hbi.metaEntry.regionServer + " but is multiply assigned to region servers " + + Joiner.on(", ").join(hbi.deployedOn)); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix assignment error..."); + setShouldRerun(); + HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn); + } + } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { + errors.reportError("Region " + descriptiveName + " listed in META on region server " + + hbi.metaEntry.regionServer + " but found on region server " + + hbi.deployedOn.get(0)); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix assignment error..."); + setShouldRerun(); + HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn); + } + } else { + errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" + + " inMeta=" + inMeta + + " inHdfs=" + inHdfs + + " isDeployed=" + isDeployed + + " isMultiplyDeployed=" + isMultiplyDeployed + + " deploymentMatchesMeta=" + deploymentMatchesMeta + + " shouldBeDeployed=" + shouldBeDeployed); } + } - // all the region left in tmp are not found on any region server - for (MetaEntry metaEntry: tmp.values()) { - // An offlined region will not be present out on a regionserver. A region - // is offlined if table is offlined -- will still have an entry in .META. - // of a region is offlined because its a parent region and its daughters - // still have references. - if (metaEntry.isOffline()) continue; - System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() + - " is not served by any region server " + - " but is listed in META to be on server " + - metaEntry.regionServer); - errorCount++; + /** + * Checks tables integrity. Goes over all regions and scans the tables. + * Collects all the pieces for each table and checks if there are missing, + * repeated or overlapping ones. + */ + void checkIntegrity() { + for (HbckInfo hbi : regionInfo.values()) { + // Check only valid, working regions + if (hbi.metaEntry == null) continue; + if (hbi.metaEntry.regionServer == null) continue; + if (hbi.foundRegionDir == null) continue; + if (hbi.deployedOn.size() != 1) continue; + if (hbi.onlyEdits) continue; + + // We should be safe here + String tableName = hbi.metaEntry.getTableDesc().getNameAsString(); + TInfo modTInfo = tablesInfo.get(tableName); + if (modTInfo == null) { + modTInfo = new TInfo(tableName); + } + for (HServerAddress server : hbi.deployedOn) { + modTInfo.addServer(server); + } + modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey()); + tablesInfo.put(tableName, modTInfo); } + + for (TInfo tInfo : tablesInfo.values()) { + if (!tInfo.check()) { + errors.reportError("Found inconsistency in table " + tInfo.getName()); + } + } + } - if (errorCount > 0) { - System.out.println("\nDetected " + errorCount + " inconsistencies. " + - "This might not indicate a real problem because these regions " + - "could be in the midst of a split. Consider re-running with a " + - "larger value of -timelag."); - return false; + /** + * Maintain information about a particular table. + */ + private class TInfo { + String tableName; + TreeMap edges; + TreeSet deployedOn; + + TInfo(String name) { + this.tableName = name; + edges = new TreeMap (Bytes.BYTES_COMPARATOR); + deployedOn = new TreeSet (); } - return true; // no errors + + public void addEdge(byte[] fromNode, byte[] toNode) { + this.edges.put(fromNode, toNode); + } + + public void addServer(HServerAddress server) { + this.deployedOn.add(server); + } + + public String getName() { + return tableName; + } + + public int getNumRegions() { + return edges.size(); + } + + public boolean check() { + byte[] last = new byte[0]; + byte[] next = new byte[0]; + TreeSet visited = new TreeSet(Bytes.BYTES_COMPARATOR); + // Each table should start with a zero-length byte[] and end at a + // zero-length byte[]. Just follow the edges to see if this is true + while (true) { + // Check if chain is broken + if (!edges.containsKey(last)) { + errors.detail("Chain of regions in table " + tableName + + " is broken."); + return false; + } + next = edges.get(last); + // Found a cycle + if (visited.contains(next)) { + errors.detail("Chain of regions in table " + tableName + + " has a cycle."); + return false; + } + // Mark next node as visited + visited.add(next); + // If next is zero-length byte[] we are possibly at the end of the chain + if (next.length == 0) { + // If we have visited all elements we are fine + if (edges.size() != visited.size()) { + errors.detail("Chain of regions in table " + tableName + + " contains less elements than are listed in META."); + return false; + } + return true; + } + last = next; + } + // How did we get here? + } } + /** * Return a list of table names whose metadata have not been modified in the * last few milliseconds specified by timelag @@ -349,18 +520,17 @@ * @return tables that have not been modified recently * @throws IOException if an error is encountered */ - HTableDescriptor[] getTables(final TreeMap regionList, - AtomicInteger numSkipped) { + HTableDescriptor[] getTables(AtomicInteger numSkipped) { TreeSet uniqueTables = new TreeSet(); long now = System.currentTimeMillis(); - for (MetaEntry m: regionList.values()) { - HRegionInfo info = m; + for (HbckInfo hbi : regionInfo.values()) { + MetaEntry info = hbi.metaEntry; // if the start key is zero, then we have found the first region of a table. // pick only those tables that were not modified in the last few milliseconds. if (info != null && info.getStartKey().length == 0) { - if (m.modTime + timelag < now) { + if (info.modTime + timelag < now) { uniqueTables.add(info.getTableDesc()); } else { numSkipped.incrementAndGet(); // one more in-flux table @@ -371,71 +541,145 @@ } /** - * Scan META. Returns a list of all regions of all known tables. - * @param regionList - fill up all entries found in .META - * @throws IOException if an error is encountered + * Gets the entry in regionInfo corresponding to the the given encoded + * region name. If the region has not been seen yet, a new entry is added + * and returned. */ - void getMetaEntries(final TreeMap regionList) throws IOException { - MetaScannerVisitor visitor = new MetaScannerVisitor() { - int countRecord = 1; + private HbckInfo getOrCreateInfo(String name) { + HbckInfo hbi = regionInfo.get(name); + if (hbi == null) { + hbi = new HbckInfo(null); + regionInfo.put(name, hbi); + } + return hbi; + } - // comparator to sort KeyValues with latest modtime - final Comparator comp = new Comparator() { - public int compare(KeyValue k1, KeyValue k2) { - return (int)(k1.getTimestamp() - k2.getTimestamp()); + /** + * Check values in regionInfo for .META. + * Check if zero or more than one regions with META are found. + * If there are inconsistencies (i.e. zero or more than one regions + * pretend to be holding the .META.) try to fix that and report an error. + * @throws IOException from HBaseFsckRepair functions + */ + boolean checkMetaEntries() throws IOException { + List metaRegions = Lists.newArrayList(); + for (HbckInfo value : regionInfo.values()) { + if (value.metaEntry.isMetaTable()) { + metaRegions.add(value); + } + } + + // If something is wrong + if (metaRegions.size() != 1) { + HRegionLocation rootLocation = connection.locateRegion( + HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); + HbckInfo root = + regionInfo.get(rootLocation.getRegionInfo().getEncodedName()); + + // If there is no region holding .META. + if (metaRegions.size() == 0) { + errors.reportError(".META. is not found on any region."); + if (shouldFix()) { + errors.print("Trying to fix a problem with .META..."); + setShouldRerun(); + // try to fix it (treat it as unassigned region) + HBaseFsckRepair.fixUnassigned(conf, root.metaEntry); + } + } + // If there are more than one regions pretending to hold the .META. + else if (metaRegions.size() > 1) { + errors.reportError(".META. is found on more than one region."); + if (shouldFix()) { + errors.print("Trying to fix a problem with .META..."); + setShouldRerun(); + // try fix it (treat is a dupe assignment) + List deployedOn = Lists.newArrayList(); + for (HbckInfo mRegion : metaRegions) { + deployedOn.add(mRegion.metaEntry.regionServer); } - }; + HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn); + } + } + // rerun hbck with hopefully fixed META + return false; + } + // no errors, so continue normally + return true; + } - public boolean processRow(Result result) throws IOException { - try { + /** + * Scan .META. and -ROOT-, adding all regions found to the regionInfo map. + * @throws IOException if an error is encountered + */ + void getMetaEntries() throws IOException { + MetaScannerVisitor visitor = new MetaScannerVisitor() { + int countRecord = 1; - // record the latest modification of this META record - long ts = Collections.max(result.list(), comp).getTimestamp(); + // comparator to sort KeyValues with latest modtime + final Comparator comp = new Comparator() { + public int compare(KeyValue k1, KeyValue k2) { + return (int)(k1.getTimestamp() - k2.getTimestamp()); + } + }; - // record region details - byte[] value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.REGIONINFO_QUALIFIER); - HRegionInfo info = null; - HServerAddress server = null; - byte[] startCode = null; - if (value != null) { - info = Writables.getHRegionInfo(value); - } + public boolean processRow(Result result) throws IOException { + try { - // record assigned region server - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER); - if (value != null && value.length > 0) { - String address = Bytes.toString(value); - server = new HServerAddress(address); - } + // record the latest modification of this META record + long ts = Collections.max(result.list(), comp).getTimestamp(); - // record region's start key - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER); - if (value != null) { - startCode = value; - } - MetaEntry m = new MetaEntry(info, server, startCode, ts); - m = regionList.put(m ,m); - if (m != null) { - throw new IOException("Two entries in META are same " + m); - } + // record region details + byte[] value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER); + HRegionInfo info = null; + HServerAddress server = null; + byte[] startCode = null; + if (value != null) { + info = Writables.getHRegionInfo(value); + } - // show proof of progress to the user, once for every 100 records. - if (countRecord % 100 == 0) { - System.out.print("."); - } - countRecord++; - return true; - } catch (RuntimeException e) { - LOG.error("Result=" + result); - throw e; + // record assigned region server + value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.SERVER_QUALIFIER); + if (value != null && value.length > 0) { + String address = Bytes.toString(value); + server = new HServerAddress(address); } + + // record region's start key + value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + if (value != null) { + startCode = value; + } + MetaEntry m = new MetaEntry(info, server, startCode, ts); + HbckInfo hbInfo = new HbckInfo(m); + HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo); + if (previous != null) { + throw new IOException("Two entries in META are same " + previous); + } + + // show proof of progress to the user, once for every 100 records. + if (countRecord % 100 == 0) { + errors.progress(); + } + countRecord++; + return true; + } catch (RuntimeException e) { + LOG.error("Result=" + result); + throw e; } - }; - MetaScanner.metaScan(conf, visitor); - System.out.println(""); + } + }; + + // Scan -ROOT- to pick up META regions + MetaScanner.metaScan(conf, visitor, + HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW, null, + Integer.MAX_VALUE); + + // Scan .META. to pick up user regions + MetaScanner.metaScan(conf, visitor); + errors.print(""); } /** @@ -443,27 +687,159 @@ */ private static class MetaEntry extends HRegionInfo { HServerAddress regionServer; // server hosting this region - byte[] startCode; // start value of region long modTime; // timestamp of most recent modification metadata public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer, byte[] startCode, long modTime) { super(rinfo); this.regionServer = regionServer; - this.startCode = startCode; this.modTime = modTime; } } /** - * Display the full report from fsck. This displays all live and dead region servers , - * and all known regions. + * Maintain information about a particular region. */ + static class HbckInfo { + boolean onlyEdits = false; + MetaEntry metaEntry = null; + FileStatus foundRegionDir = null; + List deployedOn = Lists.newArrayList(); + + HbckInfo(MetaEntry metaEntry) { + this.metaEntry = metaEntry; + } + + public String toString() { + if (metaEntry != null) { + return metaEntry.getRegionNameAsString(); + } else if (foundRegionDir != null) { + return foundRegionDir.getPath().toString(); + } else { + return "unknown region on " + Joiner.on(", ").join(deployedOn); + } + } + } + + /** + * Prints summary of all tables found on the system. + */ + private void printTableSummary() { + System.out.println("Summary:"); + for (TInfo tInfo : tablesInfo.values()) { + if (tInfo.check()) { + System.out.println("Table " + tInfo.getName() + " is okay."); + } + else { + System.out.println("Table " + tInfo.getName() + " is inconsistent."); + } + System.out.println(" -- number of regions: " + tInfo.getNumRegions()); + System.out.print(" -- deployed on:"); + for (HServerAddress server : tInfo.deployedOn) { + System.out.print(" " + server.toString()); + } + System.out.println("\n"); + } + } + + interface ErrorReporter { + public void reportError(String message); + public int summarize(); + public void detail(String details); + public void progress(); + public void print(String message); + } + + private static class PrintingErrorReporter implements ErrorReporter { + public int errorCount = 0; + private int showProgress; + + public void reportError(String message) { + if (!summary) { + System.out.println("ERROR: " + message); + } + errorCount++; + showProgress = 0; + } + + public int summarize() { + System.out.println(Integer.toString(errorCount) + + " inconsistencies detected."); + if (errorCount == 0) { + System.out.println("Status: OK"); + return 0; + } else { + System.out.println("Status: INCONSISTENT"); + return -1; + } + } + + public void print(String message) { + if (!summary) { + System.out.println(message); + } + } + + public void detail(String message) { + if (details) { + System.out.println(message); + } + showProgress = 0; + } + + public void progress() { + if (showProgress++ == 10) { + if (!summary) { + System.out.print("."); + } + showProgress = 0; + } + } + } + + /** + * Display the full report from fsck. + * This displays all live and dead region servers, and all known regions. + */ void displayFullReport() { details = true; } /** + * Set summary mode. + * Print only summary of the tables and status (OK or INCONSISTENT) + */ + void setSummary() { + summary = true; + } + + /** + * Check if we should rerun fsck again. This checks if we've tried to + * fix something and we should rerun fsck tool again. + * Display the full report from fsck. This displays all live and dead + * region servers, and all known regions. + */ + void setShouldRerun() { + rerun = true; + } + + boolean shouldRerun() { + return rerun; + } + + /** + * Fix inconsistencies found by fsck. This should try to fix errors (if any) + * found by fsck utility. + */ + void setFixErrors() { + fix = true; + } + + boolean shouldFix() { + return fix; + } + + /** * We are interested in only those tables that have not changed their state in * META during the last few seconds specified by hbase.admin.fsck.timelag * @param seconds - the time in seconds @@ -479,6 +855,8 @@ System.err.println(" -timelag {timeInSeconds} Process only regions that " + " have not experienced any metadata updates in the last " + " {{timeInSeconds} seconds."); + System.err.println(" -fix Try to fix some of the errors."); + System.err.println(" -summary Print only summary of the tables and status."); Runtime.getRuntime().exit(-2); } @@ -512,6 +890,10 @@ printUsageAndExit(); } i++; + } else if (cmd.equals("-fix")) { + fsck.setFixErrors(); + } else if (cmd.equals("-summary")) { + fsck.setSummary(); } else { String str = "Unknown command line option : " + cmd; LOG.info(str); @@ -521,6 +903,14 @@ } // do the real work of fsck int code = fsck.doWork(); + // If we have changed the HBase state it is better to run fsck again + // to see if we haven't broken something else in the process. + // We run it only once more because otherwise we can easily fall into + // an infinite loop. + if (fsck.shouldRerun()) { + code = fsck.doWork(); + } + Runtime.getRuntime().exit(code); } } Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0) @@ -0,0 +1,114 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; +import org.apache.zookeeper.KeeperException; + +public class HBaseFsckRepair { + + public static void fixDupeAssignment(Configuration conf, HRegionInfo region, + List servers) + throws IOException { + + HRegionInfo actualRegion = new HRegionInfo(region); + + // Clear status in master and zk + clearInMaster(conf, actualRegion); + clearInZK(conf, actualRegion); + + // Close region on the servers + for(HServerAddress server : servers) { + closeRegion(conf, server, actualRegion); + } + + // It's unassigned so fix it as such + fixUnassigned(conf, actualRegion); + } + + public static void fixUnassigned(Configuration conf, HRegionInfo region) + throws IOException { + + HRegionInfo actualRegion = new HRegionInfo(region); + + // Clear status in master and zk + clearInMaster(conf, actualRegion); + clearInZK(conf, actualRegion); + + // Clear assignment in META or ROOT + clearAssignment(conf, actualRegion); + } + + private static void clearInMaster(Configuration conf, HRegionInfo region) + throws IOException { + System.out.println("Region being cleared in master: " + region); + HMasterInterface master = HConnectionManager.getConnection(conf).getMaster(); + long masterVersion = + master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25); + System.out.println("Master protocol version: " + masterVersion); + try { + master.clearFromTransition(region); + } catch (Exception e) {} + } + + private static void clearInZK(Configuration conf, HRegionInfo region) + throws IOException { + ZooKeeperWrapper zkw = HConnectionManager.getConnection(conf).getZooKeeperWrapper(); +// try { + zkw.deleteUnassignedRegion(region.getEncodedName()); +// } catch(KeeperException ke) {} + } + + private static void closeRegion(Configuration conf, HServerAddress server, + HRegionInfo region) + throws IOException { + HRegionInterface rs = + HConnectionManager.getConnection(conf).getHRegionConnection(server); + rs.closeRegion(region, false); + } + + private static void clearAssignment(Configuration conf, + HRegionInfo region) + throws IOException { + HTable ht = null; + if (region.isMetaTable()) { + // Clear assignment in ROOT + ht = new HTable(conf, HConstants.ROOT_TABLE_NAME); + } + else { + // Clear assignment in META + ht = new HTable(conf, HConstants.META_TABLE_NAME); + } + Delete del = new Delete(region.getRegionName()); + del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); + del.deleteColumns(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + ht.delete(del); + } +}