Index: src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java (revision 1030293) +++ src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java (working copy) @@ -82,6 +82,13 @@ } @Test + public void testHBaseFsck() throws IOException { + HBaseFsck fsck = new HBaseFsck(TEST_UTIL.getConfiguration()); + fsck.displayFullReport(); + int result = fsck.doWork(); + } + + @Test public void testCreateTable() throws IOException { HTableDescriptor [] tables = admin.listTables(); int numTables = tables.length; Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1030293) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -1974,16 +1974,20 @@ @Override public boolean closeRegion(HRegionInfo region) throws NotServingRegionException { + return closeRegion(region, true); + } + + @Override + public boolean closeRegion(HRegionInfo region, final boolean zk) + throws NotServingRegionException { LOG.info("Received close region: " + region.getRegionNameAsString()); - // TODO: Need to check if this is being served here but currently undergoing - // a split (so master needs to retry close after split is complete) if (!onlineRegions.containsKey(region.getEncodedName())) { LOG.warn("Received close for region we are not serving; " + region.getEncodedName()); throw new NotServingRegionException("Received close for " + region.getRegionNameAsString() + " but we are not serving it"); } - return closeRegion(region, false, true); + return closeRegion(region, false, zk); } /** Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1030293) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1326,6 +1326,34 @@ } /** + * Clears the specified region from being in transition. + *

+ * Used only by HBCK tool. + * @param hri + */ + public void clearRegionFromTransition(HRegionInfo hri) { + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(hri.getEncodedName()); + } + synchronized (this.regions) { + this.regions.remove(hri); + } + synchronized (this.regionPlans) { + this.regionPlans.remove(hri.getEncodedName()); + } + synchronized (this.servers) { + for (List regions : this.servers.values()) { + for (int i=0;i metaEntries; - private boolean details = false; // do we display the full report? + private TreeMap regionInfo = new TreeMap(); + private TreeMap tablesInfo = new TreeMap(); + ErrorReporter errors = new PrintingErrorReporter(); + + private static boolean details = false; // do we display the full report private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older + private boolean fix = false; // do we want to try fixing the errors? + private boolean rerun = false; // if we tried to fix something rerun hbck + private static boolean summary = false; // if we want to print less output /** * Constructor @@ -79,19 +87,11 @@ */ public HBaseFsck(Configuration conf) throws MasterNotRunningException, ZooKeeperConnectionException, IOException { - super(conf); this.conf = conf; - // setup filesystem properties - this.rootDir = new Path(conf.get(HConstants.HBASE_DIR)); - this.fs = rootDir.getFileSystem(conf); - - - // fetch information from master - master = getMaster(); - status = master.getClusterStatus(); - connection = getConnection(); - this.metaEntries = new TreeMap(); + HBaseAdmin admin = new HBaseAdmin(conf); + status = admin.getMaster().getClusterStatus(); + connection = admin.getConnection(); } /** @@ -101,23 +101,39 @@ */ int doWork() throws IOException { // print hbase server version - System.out.println("Version: " + status.getHBaseVersion()); + errors.print("Version: " + status.getHBaseVersion()); + // Make sure regionInfo is empty before starting + regionInfo.clear(); + tablesInfo.clear(); + // get a list of all regions from the master. This involves // scanning the META table - getMetaEntries(metaEntries); + if (!recordRootRegion()) { + // Will remove later if we can fix it + errors.reportError("Encountered fatal error. Exitting..."); + return -1; + } + getMetaEntries(); + // Check if .META. is found only once and on the right place + if (!checkMetaEntries()) { + // Will remove later if we can fix it + errors.reportError("Encountered fatal error. Exitting..."); + return -1; + } + // get a list of all tables that have not changed recently. AtomicInteger numSkipped = new AtomicInteger(0); - HTableDescriptor[] allTables = getTables(metaEntries, numSkipped); - System.out.println("Number of Tables: " + allTables.length); + HTableDescriptor[] allTables = getTables(numSkipped); + errors.print("Number of Tables: " + allTables.length); if (details) { if (numSkipped.get() > 0) { - System.out.println("\n Number of Tables in flux: " + numSkipped.get()); + errors.detail("\n Number of Tables in flux: " + numSkipped.get()); } for (HTableDescriptor td : allTables) { String tableName = td.getNameAsString(); - System.out.println("\t Table: " + tableName + "\t" + + errors.detail("\t Table: " + tableName + "\t" + (td.isReadOnly() ? "ro" : "rw") + "\t" + (td.isRootRegion() ? "ROOT" : (td.isMetaRegion() ? "META" : " ")) + "\t" + @@ -127,138 +143,135 @@ // From the master, get a list of all known live region servers Collection regionServers = status.getServerInfo(); - System.out.println("Number of live region servers:" + + errors.print("Number of live region servers:" + regionServers.size()); if (details) { for (HServerInfo rsinfo: regionServers) { - System.out.println("\t RegionServer:" + rsinfo.getServerName()); + errors.print("\t RegionServer:" + rsinfo.getServerName()); } } // From the master, get a list of all dead region servers Collection deadRegionServers = status.getDeadServerNames(); - System.out.println("Number of dead region servers:" + + errors.print("Number of dead region servers:" + deadRegionServers.size()); if (details) { for (String name: deadRegionServers) { - System.out.println("\t RegionServer(dead):" + name); + errors.print("\t RegionServer(dead):" + name); } } - // process information from all region servers - boolean status1 = processRegionServers(regionServers); + // Determine what's deployed + processRegionServers(regionServers); - // match HDFS with META - boolean status2 = checkHdfs(); + // Determine what's on HDFS + checkHdfs(); - if (status1 == true && status2 == true) { - System.out.println("\nRest easy, buddy! HBase is clean. "); - return 0; - } else { - System.out.println("\nInconsistencies detected."); - return -1; - } + // Check consistency + checkConsistency(); + + // Check integrity + checkIntegrity(); + + // Print table summary + printTableSummary(); + + return errors.summarize(); } /** - * Checks HDFS and META - * @return true if there were no errors, otherwise return false + * Scan HDFS for all regions, recording their information into + * regionInfo */ - boolean checkHdfs() throws IOException { + void checkHdfs() throws IOException { + Path rootDir = new Path(conf.get(HConstants.HBASE_DIR)); + FileSystem fs = rootDir.getFileSystem(conf); - boolean status = true; // success + // list all tables from HDFS + List tableDirs = Lists.newArrayList(); - // make a copy of all tables in META - TreeMap regions = new TreeMap(); - for (MetaEntry meta: metaEntries.values()) { - regions.put(meta.getTableDesc().getNameAsString(), meta); - } - - // list all tables from HDFS - TreeMap allTableDirs = new TreeMap(); + boolean foundVersionFile = false; FileStatus[] files = fs.listStatus(rootDir); - for (int i = 0; files != null && i < files.length; i++) { - allTableDirs.put(files[i].getPath(), files[i]); + for (FileStatus file : files) { + if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) { + foundVersionFile = true; + } else { + tableDirs.add(file); + } } - // verify that -ROOT-, .META directories exists. - Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME)); - FileStatus ignore = allTableDirs.remove(rdir); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist."); - } - Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME)); - ignore = allTableDirs.remove(mdir); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Path " + mdir + " for META table does not exist."); - } - // verify that version file exists - Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME); - ignore = allTableDirs.remove(vfile); - if (ignore == null) { - status = false; - System.out.print("\nERROR: Version file " + vfile + " does not exist."); + if (!foundVersionFile) { + errors.reportError("Version file does not exist in root dir " + rootDir); } - // filter out all valid regions found in the META - for (HRegionInfo rinfo: metaEntries.values()) { - Path tableDir = HTableDescriptor.getTableDir(rootDir, - rinfo.getTableDesc().getName()); - // Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName()); - // if the entry exists in allTableDirs, then remove it from allTableDirs as well - // as from the META tmp list - FileStatus found = allTableDirs.remove(tableDir); - if (found != null) { - regions.remove(tableDir.getName()); - } - } + // level 1: /* + for (FileStatus tableDir : tableDirs) { + String tableName = tableDir.getPath().getName(); + // ignore hidden files + if (tableName.startsWith(".") && + !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) + continue; + // level 2: //* + FileStatus[] regionDirs = fs.listStatus(tableDir.getPath()); + for (FileStatus regionDir : regionDirs) { + String encodedName = regionDir.getPath().getName(); + // ignore directories that aren't hexadecimal + if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue; - // The remaining entries in allTableDirs do not have entries in .META - // However, if the path name was modified in the last few milliseconds - // as specified by timelag, then do not flag it as an inconsistency. - long now = System.currentTimeMillis(); - for (FileStatus region: allTableDirs.values()) { - if (region.getModificationTime() + timelag < now) { - String finalComponent = region.getPath().getName(); - if (!finalComponent.startsWith(".")) { - // ignore .logs and .oldlogs directories - System.out.print("\nERROR: Path " + region.getPath() + - " does not have a corresponding entry in META."); - status = false; + HbckInfo hbi = getOrCreateInfo(encodedName); + hbi.foundRegionDir = regionDir; + + // Set a flag if this region contains only edits + // This is special case if a region is left after split + hbi.onlyEdits = true; + FileStatus[] subDirs = fs.listStatus(regionDir.getPath()); + Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath()); + for (FileStatus subDir : subDirs) { + String sdName = subDir.getPath().getName(); + if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) { + hbi.onlyEdits = false; + break; + } } } } + } - // the remaining entries in tmp do not have entries in HDFS - for (HRegionInfo rinfo: regions.values()) { - System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() + - " does not have a corresponding entry in HDFS."); - status = false; + /** + * Record the location of the ROOT region as found in ZooKeeper, + * as if it were in a META table. This is so that we can check + * deployment of ROOT. + */ + boolean recordRootRegion() throws IOException { + HRegionLocation rootLocation = connection.locateRegion( + HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); + + // Check if Root region is valid and existing + if (rootLocation == null || rootLocation.getRegionInfo() == null || + rootLocation.getServerAddress() == null) { + errors.reportError("Root Region or some of its attributes is null."); + return false; } - return status; + + MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(), + rootLocation.getServerAddress(), null, System.currentTimeMillis()); + HbckInfo hbInfo = new HbckInfo(m); + regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo); + return true; } - + /** * Contacts each regionserver and fetches metadata about regions. * @param regionServerList - the list of region servers to connect to * @throws IOException if a remote or network exception occurs - * @return true if there were no errors, otherwise return false */ - boolean processRegionServers(Collection regionServerList) + void processRegionServers(Collection regionServerList) throws IOException { - // make a copy of all entries in META - TreeMap tmp = - new TreeMap(metaEntries); - long errorCount = 0; // number of inconsistencies detected - int showProgress = 0; - // loop to contact each region server for (HServerInfo rsinfo: regionServerList) { - showProgress++; // one more server. + errors.progress(); try { HRegionInterface server = connection.getHRegionConnection( rsinfo.getServerAddress()); @@ -266,85 +279,238 @@ // list all online regions from this region server NavigableSet regions = server.getOnlineRegions(); if (details) { - System.out.print("\nRegionServer:" + rsinfo.getServerName() + + errors.detail("\nRegionServer:" + rsinfo.getServerName() + " number of regions:" + regions.size()); for (HRegionInfo rinfo: regions) { - System.out.print("\n\t name:" + rinfo.getRegionNameAsString() + + errors.detail("\n\t name:" + rinfo.getRegionNameAsString() + " id:" + rinfo.getRegionId() + " encoded name:" + rinfo.getEncodedName() + " start :" + Bytes.toStringBinary(rinfo.getStartKey()) + " end :" + Bytes.toStringBinary(rinfo.getEndKey())); } - showProgress = 0; } // check to see if the existance of this region matches the region in META - for (HRegionInfo r: regions) { - MetaEntry metaEntry = metaEntries.get(r); - - // this entry exists in the region server but is not in the META - if (metaEntry == null) { - if (r.isMetaRegion()) { - continue; // this is ROOT or META region - } - System.out.print("\nERROR: Region " + r.getRegionNameAsString() + - " found on server " + rsinfo.getServerAddress() + - " but is not listed in META."); - errorCount++; - showProgress = 0; - continue; - } - if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) { - System.out.print("\nERROR: Region " + r.getRegionNameAsString() + - " found on server " + rsinfo.getServerAddress() + - " but is listed in META to be on server " + - metaEntry.regionServer); - errorCount++; - showProgress = 0; - } - - // The region server is indeed serving a valid region. Remove it from tmp - tmp.remove(r); + for (HRegionInfo r:regions) { + HbckInfo hbi = getOrCreateInfo(r.getEncodedName()); + hbi.deployedOn.add(rsinfo.getServerAddress()); } } catch (IOException e) { // unable to connect to the region server. - if (details) { - System.out.print("\nRegionServer:" + rsinfo.getServerName() + + errors.reportError("\nRegionServer:" + rsinfo.getServerName() + " Unable to fetch region information. " + e); - } } - if (showProgress % 10 == 0) { - System.out.print("."); // show progress to user - showProgress = 0; + } + } + + /** + * Check consistency of all regions that have been found in previous phases. + */ + void checkConsistency() throws IOException { + for (HbckInfo hbi : regionInfo.values()) { + doConsistencyCheck(hbi); + } + } + + /** + * Check a single region for consistency and correct deployment. + */ + void doConsistencyCheck(HbckInfo hbi) throws IOException { + String descriptiveName = hbi.toString(); + + boolean inMeta = hbi.metaEntry != null; + boolean inHdfs = hbi.foundRegionDir != null; + boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null; + boolean isDeployed = !hbi.deployedOn.isEmpty(); + boolean isMultiplyDeployed = hbi.deployedOn.size() > 1; + boolean deploymentMatchesMeta = + hasMetaAssignment && isDeployed && !isMultiplyDeployed && + hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0)); + boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline(); + boolean recentlyModified = hbi.foundRegionDir != null && + hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis(); + + // ========== First the healthy cases ============= + if (hbi.onlyEdits) { + return; + } + if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) { + return; + } else if (inMeta && !shouldBeDeployed && !isDeployed) { + // offline regions shouldn't cause complaints + LOG.debug("Region " + descriptiveName + " offline, ignoring."); + return; + } else if (recentlyModified) { + LOG.info("Region " + descriptiveName + " was recently modified -- skipping"); + return; + } + // ========== Cases where the region is not in META ============= + else if (!inMeta && !inHdfs && !isDeployed) { + // We shouldn't have record of this region at all then! + assert false : "Entry for region with no data"; + } else if (!inMeta && !inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " + + "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (!inMeta && inHdfs && !isDeployed) { + errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " + + "or deployed on any region server."); + } else if (!inMeta && inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " not in META, but deployed on " + + Joiner.on(", ").join(hbi.deployedOn)); + + // ========== Cases where the region is in META ============= + } else if (inMeta && !inHdfs && !isDeployed) { + errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " + + "or deployed on any region server."); + } else if (inMeta && !inHdfs && isDeployed) { + errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " + + "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { + errors.reportError("Region " + descriptiveName + " not deployed on any region server."); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix unassigned region..."); + setShouldRerun(); + HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry); } + } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { + errors.reportError("Region " + descriptiveName + " has should not be deployed according " + + "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + } else if (inMeta && inHdfs && isMultiplyDeployed) { + errors.reportError("Region " + descriptiveName + " is listed in META on region server " + + hbi.metaEntry.regionServer + " but is multiply assigned to region servers " + + Joiner.on(", ").join(hbi.deployedOn)); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix assignment error..."); + setShouldRerun(); + HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn); + } + } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { + errors.reportError("Region " + descriptiveName + " listed in META on region server " + + hbi.metaEntry.regionServer + " but found on region server " + + hbi.deployedOn.get(0)); + // If we are trying to fix the errors + if (shouldFix()) { + errors.print("Trying to fix assignment error..."); + setShouldRerun(); + HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn); + } + } else { + errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" + + " inMeta=" + inMeta + + " inHdfs=" + inHdfs + + " isDeployed=" + isDeployed + + " isMultiplyDeployed=" + isMultiplyDeployed + + " deploymentMatchesMeta=" + deploymentMatchesMeta + + " shouldBeDeployed=" + shouldBeDeployed); } + } - // all the region left in tmp are not found on any region server - for (MetaEntry metaEntry: tmp.values()) { - // An offlined region will not be present out on a regionserver. A region - // is offlined if table is offlined -- will still have an entry in .META. - // of a region is offlined because its a parent region and its daughters - // still have references. - if (metaEntry.isOffline()) continue; - System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() + - " is not served by any region server " + - " but is listed in META to be on server " + - metaEntry.regionServer); - errorCount++; + /** + * Checks tables integrity. Goes over all regions and scans the tables. + * Collects all the pieces for each table and checks if there are missing, + * repeated or overlapping ones. + */ + void checkIntegrity() { + for (HbckInfo hbi : regionInfo.values()) { + // Check only valid, working regions + if (hbi.metaEntry == null) continue; + if (hbi.metaEntry.regionServer == null) continue; + if (hbi.foundRegionDir == null) continue; + if (hbi.deployedOn.size() != 1) continue; + if (hbi.onlyEdits) continue; + + // We should be safe here + String tableName = hbi.metaEntry.getTableDesc().getNameAsString(); + TInfo modTInfo = tablesInfo.get(tableName); + if (modTInfo == null) { + modTInfo = new TInfo(tableName); + } + for (HServerAddress server : hbi.deployedOn) { + modTInfo.addServer(server); + } + modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey()); + tablesInfo.put(tableName, modTInfo); } - if (errorCount > 0) { - System.out.println("\nDetected " + errorCount + " inconsistencies. " + - "This might not indicate a real problem because these regions " + - "could be in the midst of a split. Consider re-running with a " + - "larger value of -timelag."); - return false; + for (TInfo tInfo : tablesInfo.values()) { + if (!tInfo.check()) { + errors.reportError("Found inconsistency in table " + tInfo.getName()); + } } - return true; // no errors } /** - * Return a list of table names whose metadata have not been modified in the - * last few milliseconds specified by timelag + * Maintain information about a particular table. + */ + private class TInfo { + String tableName; + TreeMap edges; + TreeSet deployedOn; + + TInfo(String name) { + this.tableName = name; + edges = new TreeMap (Bytes.BYTES_COMPARATOR); + deployedOn = new TreeSet (); + } + + public void addEdge(byte[] fromNode, byte[] toNode) { + this.edges.put(fromNode, toNode); + } + + public void addServer(HServerAddress server) { + this.deployedOn.add(server); + } + + public String getName() { + return tableName; + } + + public int getNumRegions() { + return edges.size(); + } + + public boolean check() { + byte[] last = new byte[0]; + byte[] next = new byte[0]; + TreeSet visited = new TreeSet(Bytes.BYTES_COMPARATOR); + // Each table should start with a zero-length byte[] and end at a + // zero-length byte[]. Just follow the edges to see if this is true + while (true) { + // Check if chain is broken + if (!edges.containsKey(last)) { + errors.detail("Chain of regions in table " + tableName + + " is broken."); + return false; + } + next = edges.get(last); + // Found a cycle + if (visited.contains(next)) { + errors.detail("Chain of regions in table " + tableName + + " has a cycle."); + return false; + } + // Mark next node as visited + visited.add(next); + // If next is zero-length byte[] we are possibly at the end of the chain + if (next.length == 0) { + // If we have visited all elements we are fine + if (edges.size() != visited.size()) { + errors.detail("Chain of regions in table " + tableName + + " contains less elements than are listed in META."); + return false; + } + return true; + } + last = next; + } + // How did we get here? + } + } + + /** + * Return a list of user-space table names whose metadata have not been + * modified in the last few milliseconds specified by timelag * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER, * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last * milliseconds specified by timelag, then the table is a candidate to be returned. @@ -352,18 +518,17 @@ * @return tables that have not been modified recently * @throws IOException if an error is encountered */ - HTableDescriptor[] getTables(final TreeMap regionList, - AtomicInteger numSkipped) { + HTableDescriptor[] getTables(AtomicInteger numSkipped) { TreeSet uniqueTables = new TreeSet(); long now = System.currentTimeMillis(); - for (MetaEntry m: regionList.values()) { - HRegionInfo info = m; + for (HbckInfo hbi : regionInfo.values()) { + MetaEntry info = hbi.metaEntry; // if the start key is zero, then we have found the first region of a table. // pick only those tables that were not modified in the last few milliseconds. - if (info != null && info.getStartKey().length == 0) { - if (m.modTime + timelag < now) { + if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) { + if (info.modTime + timelag < now) { uniqueTables.add(info.getTableDesc()); } else { numSkipped.incrementAndGet(); // one more in-flux table @@ -374,71 +539,144 @@ } /** - * Scan META. Returns a list of all regions of all known tables. - * @param regionList - fill up all entries found in .META - * @throws IOException if an error is encountered + * Gets the entry in regionInfo corresponding to the the given encoded + * region name. If the region has not been seen yet, a new entry is added + * and returned. */ - void getMetaEntries(final TreeMap regionList) throws IOException { - MetaScannerVisitor visitor = new MetaScannerVisitor() { - int countRecord = 1; + private HbckInfo getOrCreateInfo(String name) { + HbckInfo hbi = regionInfo.get(name); + if (hbi == null) { + hbi = new HbckInfo(null); + regionInfo.put(name, hbi); + } + return hbi; + } - // comparator to sort KeyValues with latest modtime - final Comparator comp = new Comparator() { - public int compare(KeyValue k1, KeyValue k2) { - return (int)(k1.getTimestamp() - k2.getTimestamp()); + /** + * Check values in regionInfo for .META. + * Check if zero or more than one regions with META are found. + * If there are inconsistencies (i.e. zero or more than one regions + * pretend to be holding the .META.) try to fix that and report an error. + * @throws IOException from HBaseFsckRepair functions + */ + boolean checkMetaEntries() throws IOException { + List metaRegions = Lists.newArrayList(); + for (HbckInfo value : regionInfo.values()) { + if (value.metaEntry.isMetaTable()) { + metaRegions.add(value); + } + } + + // If something is wrong + if (metaRegions.size() != 1) { + HRegionLocation rootLocation = connection.locateRegion( + HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW); + HbckInfo root = + regionInfo.get(rootLocation.getRegionInfo().getEncodedName()); + + // If there is no region holding .META. + if (metaRegions.size() == 0) { + errors.reportError(".META. is not found on any region."); + if (shouldFix()) { + errors.print("Trying to fix a problem with .META..."); + setShouldRerun(); + // try to fix it (treat it as unassigned region) + HBaseFsckRepair.fixUnassigned(conf, root.metaEntry); + } + } + // If there are more than one regions pretending to hold the .META. + else if (metaRegions.size() > 1) { + errors.reportError(".META. is found on more than one region."); + if (shouldFix()) { + errors.print("Trying to fix a problem with .META..."); + setShouldRerun(); + // try fix it (treat is a dupe assignment) + List deployedOn = Lists.newArrayList(); + for (HbckInfo mRegion : metaRegions) { + deployedOn.add(mRegion.metaEntry.regionServer); } - }; + HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn); + } + } + // rerun hbck with hopefully fixed META + return false; + } + // no errors, so continue normally + return true; + } - public boolean processRow(Result result) throws IOException { - try { + /** + * Scan .META. and -ROOT-, adding all regions found to the regionInfo map. + * @throws IOException if an error is encountered + */ + void getMetaEntries() throws IOException { + MetaScannerVisitor visitor = new MetaScannerVisitor() { + int countRecord = 1; - // record the latest modification of this META record - long ts = Collections.max(result.list(), comp).getTimestamp(); + // comparator to sort KeyValues with latest modtime + final Comparator comp = new Comparator() { + public int compare(KeyValue k1, KeyValue k2) { + return (int)(k1.getTimestamp() - k2.getTimestamp()); + } + }; - // record region details - byte[] value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.REGIONINFO_QUALIFIER); - HRegionInfo info = null; - HServerAddress server = null; - byte[] startCode = null; - if (value != null) { - info = Writables.getHRegionInfo(value); - } + public boolean processRow(Result result) throws IOException { + try { - // record assigned region server - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.SERVER_QUALIFIER); - if (value != null && value.length > 0) { - String address = Bytes.toString(value); - server = new HServerAddress(address); - } + // record the latest modification of this META record + long ts = Collections.max(result.list(), comp).getTimestamp(); - // record region's start key - value = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER); - if (value != null) { - startCode = value; - } - MetaEntry m = new MetaEntry(info, server, startCode, ts); - m = regionList.put(m ,m); - if (m != null) { - throw new IOException("Two entries in META are same " + m); - } + // record region details + byte[] value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER); + HRegionInfo info = null; + HServerAddress server = null; + byte[] startCode = null; + if (value != null) { + info = Writables.getHRegionInfo(value); + } - // show proof of progress to the user, once for every 100 records. - if (countRecord % 100 == 0) { - System.out.print("."); - } - countRecord++; - return true; - } catch (RuntimeException e) { - LOG.error("Result=" + result); - throw e; + // record assigned region server + value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.SERVER_QUALIFIER); + if (value != null && value.length > 0) { + String address = Bytes.toString(value); + server = new HServerAddress(address); } + + // record region's start key + value = result.getValue(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + if (value != null) { + startCode = value; + } + MetaEntry m = new MetaEntry(info, server, startCode, ts); + HbckInfo hbInfo = new HbckInfo(m); + HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo); + if (previous != null) { + throw new IOException("Two entries in META are same " + previous); + } + + // show proof of progress to the user, once for every 100 records. + if (countRecord % 100 == 0) { + errors.progress(); + } + countRecord++; + return true; + } catch (RuntimeException e) { + LOG.error("Result=" + result); + throw e; } - }; - MetaScanner.metaScan(conf, visitor); - System.out.println(""); + } + }; + + // Scan -ROOT- to pick up META regions + MetaScanner.metaScan(conf, visitor, HConstants.ROOT_TABLE_NAME, + HConstants.EMPTY_START_ROW, Integer.MAX_VALUE); + + // Scan .META. to pick up user regions + MetaScanner.metaScan(conf, visitor); + errors.print(""); } /** @@ -446,27 +684,159 @@ */ private static class MetaEntry extends HRegionInfo { HServerAddress regionServer; // server hosting this region - byte[] startCode; // start value of region long modTime; // timestamp of most recent modification metadata public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer, byte[] startCode, long modTime) { super(rinfo); this.regionServer = regionServer; - this.startCode = startCode; this.modTime = modTime; } } /** - * Display the full report from fsck. This displays all live and dead region servers , - * and all known regions. + * Maintain information about a particular region. */ + static class HbckInfo { + boolean onlyEdits = false; + MetaEntry metaEntry = null; + FileStatus foundRegionDir = null; + List deployedOn = Lists.newArrayList(); + + HbckInfo(MetaEntry metaEntry) { + this.metaEntry = metaEntry; + } + + public String toString() { + if (metaEntry != null) { + return metaEntry.getRegionNameAsString(); + } else if (foundRegionDir != null) { + return foundRegionDir.getPath().toString(); + } else { + return "unknown region on " + Joiner.on(", ").join(deployedOn); + } + } + } + + /** + * Prints summary of all tables found on the system. + */ + private void printTableSummary() { + System.out.println("Summary:"); + for (TInfo tInfo : tablesInfo.values()) { + if (tInfo.check()) { + System.out.println("Table " + tInfo.getName() + " is okay."); + } + else { + System.out.println("Table " + tInfo.getName() + " is inconsistent."); + } + System.out.println(" -- number of regions: " + tInfo.getNumRegions()); + System.out.print(" -- deployed on:"); + for (HServerAddress server : tInfo.deployedOn) { + System.out.print(" " + server.toString()); + } + System.out.println("\n"); + } + } + + interface ErrorReporter { + public void reportError(String message); + public int summarize(); + public void detail(String details); + public void progress(); + public void print(String message); + } + + private static class PrintingErrorReporter implements ErrorReporter { + public int errorCount = 0; + private int showProgress; + + public void reportError(String message) { + if (!summary) { + System.out.println("ERROR: " + message); + } + errorCount++; + showProgress = 0; + } + + public int summarize() { + System.out.println(Integer.toString(errorCount) + + " inconsistencies detected."); + if (errorCount == 0) { + System.out.println("Status: OK"); + return 0; + } else { + System.out.println("Status: INCONSISTENT"); + return -1; + } + } + + public void print(String message) { + if (!summary) { + System.out.println(message); + } + } + + public void detail(String message) { + if (details) { + System.out.println(message); + } + showProgress = 0; + } + + public void progress() { + if (showProgress++ == 10) { + if (!summary) { + System.out.print("."); + } + showProgress = 0; + } + } + } + + /** + * Display the full report from fsck. + * This displays all live and dead region servers, and all known regions. + */ void displayFullReport() { details = true; } /** + * Set summary mode. + * Print only summary of the tables and status (OK or INCONSISTENT) + */ + void setSummary() { + summary = true; + } + + /** + * Check if we should rerun fsck again. This checks if we've tried to + * fix something and we should rerun fsck tool again. + * Display the full report from fsck. This displays all live and dead + * region servers, and all known regions. + */ + void setShouldRerun() { + rerun = true; + } + + boolean shouldRerun() { + return rerun; + } + + /** + * Fix inconsistencies found by fsck. This should try to fix errors (if any) + * found by fsck utility. + */ + void setFixErrors() { + fix = true; + } + + boolean shouldFix() { + return fix; + } + + /** * We are interested in only those tables that have not changed their state in * META during the last few seconds specified by hbase.admin.fsck.timelag * @param seconds - the time in seconds @@ -482,6 +852,9 @@ System.err.println(" -timelag {timeInSeconds} Process only regions that " + " have not experienced any metadata updates in the last " + " {{timeInSeconds} seconds."); + System.err.println(" -fix Try to fix some of the errors."); + System.err.println(" -summary Print only summary of the tables and status."); + Runtime.getRuntime().exit(-2); } @@ -515,6 +888,10 @@ printUsageAndExit(); } i++; + } else if (cmd.equals("-fix")) { + fsck.setFixErrors(); + } else if (cmd.equals("-summary")) { + fsck.setSummary(); } else { String str = "Unknown command line option : " + cmd; LOG.info(str); @@ -524,6 +901,14 @@ } // do the real work of fsck int code = fsck.doWork(); + // If we have changed the HBase state it is better to run fsck again + // to see if we haven't broken something else in the process. + // We run it only once more because otherwise we can easily fall into + // an infinite loop. + if (fsck.shouldRerun()) { + code = fsck.doWork(); + } + Runtime.getRuntime().exit(code); } } Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0) @@ -0,0 +1,121 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +public class HBaseFsckRepair { + + public static void fixDupeAssignment(Configuration conf, HRegionInfo region, + List servers) + throws IOException { + + HRegionInfo actualRegion = new HRegionInfo(region); + + // Clear status in master and zk + clearInMaster(conf, actualRegion); + clearInZK(conf, actualRegion); + + // Close region on the servers + for(HServerAddress server : servers) { + closeRegion(conf, server, actualRegion); + } + + // It's unassigned so fix it as such + fixUnassigned(conf, actualRegion); + } + + public static void fixUnassigned(Configuration conf, HRegionInfo region) + throws IOException { + + HRegionInfo actualRegion = new HRegionInfo(region); + + // Clear status in master and zk + clearInMaster(conf, actualRegion); + clearInZK(conf, actualRegion); + + // Clear assignment in META or ROOT + clearAssignment(conf, actualRegion); + } + + private static void clearInMaster(Configuration conf, HRegionInfo region) + throws IOException { + System.out.println("Region being cleared in master: " + region); + HMasterInterface master = HConnectionManager.getConnection(conf).getMaster(); + long masterVersion = + master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25); + System.out.println("Master protocol version: " + masterVersion); + try { + // TODO: Do we want to do it this way? + // Better way is to tell master to fix the issue itself? + // That way it can use in-memory state to determine best plan +// master.clearFromTransition(region); + } catch (Exception e) {} + } + + private static void clearInZK(Configuration conf, HRegionInfo region) + throws IOException { + ZooKeeperWatcher zkw = + HConnectionManager.getConnection(conf).getZooKeeperWatcher(); + try { + ZKAssign.deleteNodeFailSilent(zkw, region); + } catch (KeeperException e) { + throw new IOException("Unexpected ZK exception", e); + } + } + + private static void closeRegion(Configuration conf, HServerAddress server, + HRegionInfo region) + throws IOException { + HRegionInterface rs = + HConnectionManager.getConnection(conf).getHRegionConnection(server); + rs.closeRegion(region, false); + } + + private static void clearAssignment(Configuration conf, + HRegionInfo region) + throws IOException { + HTable ht = null; + if (region.isMetaTable()) { + // Clear assignment in ROOT + ht = new HTable(conf, HConstants.ROOT_TABLE_NAME); + } + else { + // Clear assignment in META + ht = new HTable(conf, HConstants.META_TABLE_NAME); + } + Delete del = new Delete(region.getRegionName()); + del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); + del.deleteColumns(HConstants.CATALOG_FAMILY, + HConstants.STARTCODE_QUALIFIER); + ht.delete(del); + } +}