Index: src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java (revision 1030293)
+++ src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java (working copy)
@@ -82,6 +82,13 @@
}
@Test
+ public void testHBaseFsck() throws IOException {
+ HBaseFsck fsck = new HBaseFsck(TEST_UTIL.getConfiguration());
+ fsck.displayFullReport();
+ int result = fsck.doWork();
+ }
+
+ @Test
public void testCreateTable() throws IOException {
HTableDescriptor [] tables = admin.listTables();
int numTables = tables.length;
Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1030293)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy)
@@ -1974,16 +1974,20 @@
@Override
public boolean closeRegion(HRegionInfo region)
throws NotServingRegionException {
+ return closeRegion(region, true);
+ }
+
+ @Override
+ public boolean closeRegion(HRegionInfo region, final boolean zk)
+ throws NotServingRegionException {
LOG.info("Received close region: " + region.getRegionNameAsString());
- // TODO: Need to check if this is being served here but currently undergoing
- // a split (so master needs to retry close after split is complete)
if (!onlineRegions.containsKey(region.getEncodedName())) {
LOG.warn("Received close for region we are not serving; " +
region.getEncodedName());
throw new NotServingRegionException("Received close for "
+ region.getRegionNameAsString() + " but we are not serving it");
}
- return closeRegion(region, false, true);
+ return closeRegion(region, false, zk);
}
/**
Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1030293)
+++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy)
@@ -1326,6 +1326,34 @@
}
/**
+ * Clears the specified region from being in transition.
+ *
+ * Used only by HBCK tool.
+ * @param hri
+ */
+ public void clearRegionFromTransition(HRegionInfo hri) {
+ synchronized (this.regionsInTransition) {
+ this.regionsInTransition.remove(hri.getEncodedName());
+ }
+ synchronized (this.regions) {
+ this.regions.remove(hri);
+ }
+ synchronized (this.regionPlans) {
+ this.regionPlans.remove(hri.getEncodedName());
+ }
+ synchronized (this.servers) {
+ for (List regions : this.servers.values()) {
+ for (int i=0;i metaEntries;
- private boolean details = false; // do we display the full report?
+ private TreeMap regionInfo = new TreeMap();
+ private TreeMap tablesInfo = new TreeMap();
+ ErrorReporter errors = new PrintingErrorReporter();
+
+ private static boolean details = false; // do we display the full report
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
+ private boolean fix = false; // do we want to try fixing the errors?
+ private boolean rerun = false; // if we tried to fix something rerun hbck
+ private static boolean summary = false; // if we want to print less output
/**
* Constructor
@@ -79,19 +87,11 @@
*/
public HBaseFsck(Configuration conf)
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
- super(conf);
this.conf = conf;
- // setup filesystem properties
- this.rootDir = new Path(conf.get(HConstants.HBASE_DIR));
- this.fs = rootDir.getFileSystem(conf);
-
-
- // fetch information from master
- master = getMaster();
- status = master.getClusterStatus();
- connection = getConnection();
- this.metaEntries = new TreeMap();
+ HBaseAdmin admin = new HBaseAdmin(conf);
+ status = admin.getMaster().getClusterStatus();
+ connection = admin.getConnection();
}
/**
@@ -101,23 +101,39 @@
*/
int doWork() throws IOException {
// print hbase server version
- System.out.println("Version: " + status.getHBaseVersion());
+ errors.print("Version: " + status.getHBaseVersion());
+ // Make sure regionInfo is empty before starting
+ regionInfo.clear();
+ tablesInfo.clear();
+
// get a list of all regions from the master. This involves
// scanning the META table
- getMetaEntries(metaEntries);
+ if (!recordRootRegion()) {
+ // Will remove later if we can fix it
+ errors.reportError("Encountered fatal error. Exitting...");
+ return -1;
+ }
+ getMetaEntries();
+ // Check if .META. is found only once and on the right place
+ if (!checkMetaEntries()) {
+ // Will remove later if we can fix it
+ errors.reportError("Encountered fatal error. Exitting...");
+ return -1;
+ }
+
// get a list of all tables that have not changed recently.
AtomicInteger numSkipped = new AtomicInteger(0);
- HTableDescriptor[] allTables = getTables(metaEntries, numSkipped);
- System.out.println("Number of Tables: " + allTables.length);
+ HTableDescriptor[] allTables = getTables(numSkipped);
+ errors.print("Number of Tables: " + allTables.length);
if (details) {
if (numSkipped.get() > 0) {
- System.out.println("\n Number of Tables in flux: " + numSkipped.get());
+ errors.detail("\n Number of Tables in flux: " + numSkipped.get());
}
for (HTableDescriptor td : allTables) {
String tableName = td.getNameAsString();
- System.out.println("\t Table: " + tableName + "\t" +
+ errors.detail("\t Table: " + tableName + "\t" +
(td.isReadOnly() ? "ro" : "rw") + "\t" +
(td.isRootRegion() ? "ROOT" :
(td.isMetaRegion() ? "META" : " ")) + "\t" +
@@ -127,138 +143,135 @@
// From the master, get a list of all known live region servers
Collection regionServers = status.getServerInfo();
- System.out.println("Number of live region servers:" +
+ errors.print("Number of live region servers:" +
regionServers.size());
if (details) {
for (HServerInfo rsinfo: regionServers) {
- System.out.println("\t RegionServer:" + rsinfo.getServerName());
+ errors.print("\t RegionServer:" + rsinfo.getServerName());
}
}
// From the master, get a list of all dead region servers
Collection deadRegionServers = status.getDeadServerNames();
- System.out.println("Number of dead region servers:" +
+ errors.print("Number of dead region servers:" +
deadRegionServers.size());
if (details) {
for (String name: deadRegionServers) {
- System.out.println("\t RegionServer(dead):" + name);
+ errors.print("\t RegionServer(dead):" + name);
}
}
- // process information from all region servers
- boolean status1 = processRegionServers(regionServers);
+ // Determine what's deployed
+ processRegionServers(regionServers);
- // match HDFS with META
- boolean status2 = checkHdfs();
+ // Determine what's on HDFS
+ checkHdfs();
- if (status1 == true && status2 == true) {
- System.out.println("\nRest easy, buddy! HBase is clean. ");
- return 0;
- } else {
- System.out.println("\nInconsistencies detected.");
- return -1;
- }
+ // Check consistency
+ checkConsistency();
+
+ // Check integrity
+ checkIntegrity();
+
+ // Print table summary
+ printTableSummary();
+
+ return errors.summarize();
}
/**
- * Checks HDFS and META
- * @return true if there were no errors, otherwise return false
+ * Scan HDFS for all regions, recording their information into
+ * regionInfo
*/
- boolean checkHdfs() throws IOException {
+ void checkHdfs() throws IOException {
+ Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
+ FileSystem fs = rootDir.getFileSystem(conf);
- boolean status = true; // success
+ // list all tables from HDFS
+ List tableDirs = Lists.newArrayList();
- // make a copy of all tables in META
- TreeMap regions = new TreeMap();
- for (MetaEntry meta: metaEntries.values()) {
- regions.put(meta.getTableDesc().getNameAsString(), meta);
- }
-
- // list all tables from HDFS
- TreeMap allTableDirs = new TreeMap();
+ boolean foundVersionFile = false;
FileStatus[] files = fs.listStatus(rootDir);
- for (int i = 0; files != null && i < files.length; i++) {
- allTableDirs.put(files[i].getPath(), files[i]);
+ for (FileStatus file : files) {
+ if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) {
+ foundVersionFile = true;
+ } else {
+ tableDirs.add(file);
+ }
}
- // verify that -ROOT-, .META directories exists.
- Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME));
- FileStatus ignore = allTableDirs.remove(rdir);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist.");
- }
- Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME));
- ignore = allTableDirs.remove(mdir);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Path " + mdir + " for META table does not exist.");
- }
-
// verify that version file exists
- Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
- ignore = allTableDirs.remove(vfile);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Version file " + vfile + " does not exist.");
+ if (!foundVersionFile) {
+ errors.reportError("Version file does not exist in root dir " + rootDir);
}
- // filter out all valid regions found in the META
- for (HRegionInfo rinfo: metaEntries.values()) {
- Path tableDir = HTableDescriptor.getTableDir(rootDir,
- rinfo.getTableDesc().getName());
- // Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName());
- // if the entry exists in allTableDirs, then remove it from allTableDirs as well
- // as from the META tmp list
- FileStatus found = allTableDirs.remove(tableDir);
- if (found != null) {
- regions.remove(tableDir.getName());
- }
- }
+ // level 1: /*
+ for (FileStatus tableDir : tableDirs) {
+ String tableName = tableDir.getPath().getName();
+ // ignore hidden files
+ if (tableName.startsWith(".") &&
+ !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
+ continue;
+ // level 2: //*
+ FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
+ for (FileStatus regionDir : regionDirs) {
+ String encodedName = regionDir.getPath().getName();
+ // ignore directories that aren't hexadecimal
+ if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
- // The remaining entries in allTableDirs do not have entries in .META
- // However, if the path name was modified in the last few milliseconds
- // as specified by timelag, then do not flag it as an inconsistency.
- long now = System.currentTimeMillis();
- for (FileStatus region: allTableDirs.values()) {
- if (region.getModificationTime() + timelag < now) {
- String finalComponent = region.getPath().getName();
- if (!finalComponent.startsWith(".")) {
- // ignore .logs and .oldlogs directories
- System.out.print("\nERROR: Path " + region.getPath() +
- " does not have a corresponding entry in META.");
- status = false;
+ HbckInfo hbi = getOrCreateInfo(encodedName);
+ hbi.foundRegionDir = regionDir;
+
+ // Set a flag if this region contains only edits
+ // This is special case if a region is left after split
+ hbi.onlyEdits = true;
+ FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
+ Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
+ for (FileStatus subDir : subDirs) {
+ String sdName = subDir.getPath().getName();
+ if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
+ hbi.onlyEdits = false;
+ break;
+ }
}
}
}
+ }
- // the remaining entries in tmp do not have entries in HDFS
- for (HRegionInfo rinfo: regions.values()) {
- System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() +
- " does not have a corresponding entry in HDFS.");
- status = false;
+ /**
+ * Record the location of the ROOT region as found in ZooKeeper,
+ * as if it were in a META table. This is so that we can check
+ * deployment of ROOT.
+ */
+ boolean recordRootRegion() throws IOException {
+ HRegionLocation rootLocation = connection.locateRegion(
+ HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
+
+ // Check if Root region is valid and existing
+ if (rootLocation == null || rootLocation.getRegionInfo() == null ||
+ rootLocation.getServerAddress() == null) {
+ errors.reportError("Root Region or some of its attributes is null.");
+ return false;
}
- return status;
+
+ MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(),
+ rootLocation.getServerAddress(), null, System.currentTimeMillis());
+ HbckInfo hbInfo = new HbckInfo(m);
+ regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
+ return true;
}
-
+
/**
* Contacts each regionserver and fetches metadata about regions.
* @param regionServerList - the list of region servers to connect to
* @throws IOException if a remote or network exception occurs
- * @return true if there were no errors, otherwise return false
*/
- boolean processRegionServers(Collection regionServerList)
+ void processRegionServers(Collection regionServerList)
throws IOException {
- // make a copy of all entries in META
- TreeMap tmp =
- new TreeMap(metaEntries);
- long errorCount = 0; // number of inconsistencies detected
- int showProgress = 0;
-
// loop to contact each region server
for (HServerInfo rsinfo: regionServerList) {
- showProgress++; // one more server.
+ errors.progress();
try {
HRegionInterface server = connection.getHRegionConnection(
rsinfo.getServerAddress());
@@ -266,85 +279,238 @@
// list all online regions from this region server
NavigableSet regions = server.getOnlineRegions();
if (details) {
- System.out.print("\nRegionServer:" + rsinfo.getServerName() +
+ errors.detail("\nRegionServer:" + rsinfo.getServerName() +
" number of regions:" + regions.size());
for (HRegionInfo rinfo: regions) {
- System.out.print("\n\t name:" + rinfo.getRegionNameAsString() +
+ errors.detail("\n\t name:" + rinfo.getRegionNameAsString() +
" id:" + rinfo.getRegionId() +
" encoded name:" + rinfo.getEncodedName() +
" start :" + Bytes.toStringBinary(rinfo.getStartKey()) +
" end :" + Bytes.toStringBinary(rinfo.getEndKey()));
}
- showProgress = 0;
}
// check to see if the existance of this region matches the region in META
- for (HRegionInfo r: regions) {
- MetaEntry metaEntry = metaEntries.get(r);
-
- // this entry exists in the region server but is not in the META
- if (metaEntry == null) {
- if (r.isMetaRegion()) {
- continue; // this is ROOT or META region
- }
- System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
- " found on server " + rsinfo.getServerAddress() +
- " but is not listed in META.");
- errorCount++;
- showProgress = 0;
- continue;
- }
- if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) {
- System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
- " found on server " + rsinfo.getServerAddress() +
- " but is listed in META to be on server " +
- metaEntry.regionServer);
- errorCount++;
- showProgress = 0;
- }
-
- // The region server is indeed serving a valid region. Remove it from tmp
- tmp.remove(r);
+ for (HRegionInfo r:regions) {
+ HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
+ hbi.deployedOn.add(rsinfo.getServerAddress());
}
} catch (IOException e) { // unable to connect to the region server.
- if (details) {
- System.out.print("\nRegionServer:" + rsinfo.getServerName() +
+ errors.reportError("\nRegionServer:" + rsinfo.getServerName() +
" Unable to fetch region information. " + e);
- }
}
- if (showProgress % 10 == 0) {
- System.out.print("."); // show progress to user
- showProgress = 0;
+ }
+ }
+
+ /**
+ * Check consistency of all regions that have been found in previous phases.
+ */
+ void checkConsistency() throws IOException {
+ for (HbckInfo hbi : regionInfo.values()) {
+ doConsistencyCheck(hbi);
+ }
+ }
+
+ /**
+ * Check a single region for consistency and correct deployment.
+ */
+ void doConsistencyCheck(HbckInfo hbi) throws IOException {
+ String descriptiveName = hbi.toString();
+
+ boolean inMeta = hbi.metaEntry != null;
+ boolean inHdfs = hbi.foundRegionDir != null;
+ boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
+ boolean isDeployed = !hbi.deployedOn.isEmpty();
+ boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
+ boolean deploymentMatchesMeta =
+ hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
+ hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
+ boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
+ boolean recentlyModified = hbi.foundRegionDir != null &&
+ hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
+
+ // ========== First the healthy cases =============
+ if (hbi.onlyEdits) {
+ return;
+ }
+ if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
+ return;
+ } else if (inMeta && !shouldBeDeployed && !isDeployed) {
+ // offline regions shouldn't cause complaints
+ LOG.debug("Region " + descriptiveName + " offline, ignoring.");
+ return;
+ } else if (recentlyModified) {
+ LOG.info("Region " + descriptiveName + " was recently modified -- skipping");
+ return;
+ }
+ // ========== Cases where the region is not in META =============
+ else if (!inMeta && !inHdfs && !isDeployed) {
+ // We shouldn't have record of this region at all then!
+ assert false : "Entry for region with no data";
+ } else if (!inMeta && !inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " +
+ "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (!inMeta && inHdfs && !isDeployed) {
+ errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " +
+ "or deployed on any region server.");
+ } else if (!inMeta && inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " not in META, but deployed on " +
+ Joiner.on(", ").join(hbi.deployedOn));
+
+ // ========== Cases where the region is in META =============
+ } else if (inMeta && !inHdfs && !isDeployed) {
+ errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " +
+ "or deployed on any region server.");
+ } else if (inMeta && !inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " +
+ "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
+ errors.reportError("Region " + descriptiveName + " not deployed on any region server.");
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix unassigned region...");
+ setShouldRerun();
+ HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
}
+ } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
+ errors.reportError("Region " + descriptiveName + " has should not be deployed according " +
+ "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (inMeta && inHdfs && isMultiplyDeployed) {
+ errors.reportError("Region " + descriptiveName + " is listed in META on region server " +
+ hbi.metaEntry.regionServer + " but is multiply assigned to region servers " +
+ Joiner.on(", ").join(hbi.deployedOn));
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix assignment error...");
+ setShouldRerun();
+ HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
+ }
+ } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
+ errors.reportError("Region " + descriptiveName + " listed in META on region server " +
+ hbi.metaEntry.regionServer + " but found on region server " +
+ hbi.deployedOn.get(0));
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix assignment error...");
+ setShouldRerun();
+ HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
+ }
+ } else {
+ errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" +
+ " inMeta=" + inMeta +
+ " inHdfs=" + inHdfs +
+ " isDeployed=" + isDeployed +
+ " isMultiplyDeployed=" + isMultiplyDeployed +
+ " deploymentMatchesMeta=" + deploymentMatchesMeta +
+ " shouldBeDeployed=" + shouldBeDeployed);
}
+ }
- // all the region left in tmp are not found on any region server
- for (MetaEntry metaEntry: tmp.values()) {
- // An offlined region will not be present out on a regionserver. A region
- // is offlined if table is offlined -- will still have an entry in .META.
- // of a region is offlined because its a parent region and its daughters
- // still have references.
- if (metaEntry.isOffline()) continue;
- System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() +
- " is not served by any region server " +
- " but is listed in META to be on server " +
- metaEntry.regionServer);
- errorCount++;
+ /**
+ * Checks tables integrity. Goes over all regions and scans the tables.
+ * Collects all the pieces for each table and checks if there are missing,
+ * repeated or overlapping ones.
+ */
+ void checkIntegrity() {
+ for (HbckInfo hbi : regionInfo.values()) {
+ // Check only valid, working regions
+ if (hbi.metaEntry == null) continue;
+ if (hbi.metaEntry.regionServer == null) continue;
+ if (hbi.foundRegionDir == null) continue;
+ if (hbi.deployedOn.size() != 1) continue;
+ if (hbi.onlyEdits) continue;
+
+ // We should be safe here
+ String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
+ TInfo modTInfo = tablesInfo.get(tableName);
+ if (modTInfo == null) {
+ modTInfo = new TInfo(tableName);
+ }
+ for (HServerAddress server : hbi.deployedOn) {
+ modTInfo.addServer(server);
+ }
+ modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
+ tablesInfo.put(tableName, modTInfo);
}
- if (errorCount > 0) {
- System.out.println("\nDetected " + errorCount + " inconsistencies. " +
- "This might not indicate a real problem because these regions " +
- "could be in the midst of a split. Consider re-running with a " +
- "larger value of -timelag.");
- return false;
+ for (TInfo tInfo : tablesInfo.values()) {
+ if (!tInfo.check()) {
+ errors.reportError("Found inconsistency in table " + tInfo.getName());
+ }
}
- return true; // no errors
}
/**
- * Return a list of table names whose metadata have not been modified in the
- * last few milliseconds specified by timelag
+ * Maintain information about a particular table.
+ */
+ private class TInfo {
+ String tableName;
+ TreeMap edges;
+ TreeSet deployedOn;
+
+ TInfo(String name) {
+ this.tableName = name;
+ edges = new TreeMap (Bytes.BYTES_COMPARATOR);
+ deployedOn = new TreeSet ();
+ }
+
+ public void addEdge(byte[] fromNode, byte[] toNode) {
+ this.edges.put(fromNode, toNode);
+ }
+
+ public void addServer(HServerAddress server) {
+ this.deployedOn.add(server);
+ }
+
+ public String getName() {
+ return tableName;
+ }
+
+ public int getNumRegions() {
+ return edges.size();
+ }
+
+ public boolean check() {
+ byte[] last = new byte[0];
+ byte[] next = new byte[0];
+ TreeSet visited = new TreeSet(Bytes.BYTES_COMPARATOR);
+ // Each table should start with a zero-length byte[] and end at a
+ // zero-length byte[]. Just follow the edges to see if this is true
+ while (true) {
+ // Check if chain is broken
+ if (!edges.containsKey(last)) {
+ errors.detail("Chain of regions in table " + tableName +
+ " is broken.");
+ return false;
+ }
+ next = edges.get(last);
+ // Found a cycle
+ if (visited.contains(next)) {
+ errors.detail("Chain of regions in table " + tableName +
+ " has a cycle.");
+ return false;
+ }
+ // Mark next node as visited
+ visited.add(next);
+ // If next is zero-length byte[] we are possibly at the end of the chain
+ if (next.length == 0) {
+ // If we have visited all elements we are fine
+ if (edges.size() != visited.size()) {
+ errors.detail("Chain of regions in table " + tableName +
+ " contains less elements than are listed in META.");
+ return false;
+ }
+ return true;
+ }
+ last = next;
+ }
+ // How did we get here?
+ }
+ }
+
+ /**
+ * Return a list of user-space table names whose metadata have not been
+ * modified in the last few milliseconds specified by timelag
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
* milliseconds specified by timelag, then the table is a candidate to be returned.
@@ -352,18 +518,17 @@
* @return tables that have not been modified recently
* @throws IOException if an error is encountered
*/
- HTableDescriptor[] getTables(final TreeMap regionList,
- AtomicInteger numSkipped) {
+ HTableDescriptor[] getTables(AtomicInteger numSkipped) {
TreeSet uniqueTables = new TreeSet();
long now = System.currentTimeMillis();
- for (MetaEntry m: regionList.values()) {
- HRegionInfo info = m;
+ for (HbckInfo hbi : regionInfo.values()) {
+ MetaEntry info = hbi.metaEntry;
// if the start key is zero, then we have found the first region of a table.
// pick only those tables that were not modified in the last few milliseconds.
- if (info != null && info.getStartKey().length == 0) {
- if (m.modTime + timelag < now) {
+ if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
+ if (info.modTime + timelag < now) {
uniqueTables.add(info.getTableDesc());
} else {
numSkipped.incrementAndGet(); // one more in-flux table
@@ -374,71 +539,144 @@
}
/**
- * Scan META. Returns a list of all regions of all known tables.
- * @param regionList - fill up all entries found in .META
- * @throws IOException if an error is encountered
+ * Gets the entry in regionInfo corresponding to the the given encoded
+ * region name. If the region has not been seen yet, a new entry is added
+ * and returned.
*/
- void getMetaEntries(final TreeMap regionList) throws IOException {
- MetaScannerVisitor visitor = new MetaScannerVisitor() {
- int countRecord = 1;
+ private HbckInfo getOrCreateInfo(String name) {
+ HbckInfo hbi = regionInfo.get(name);
+ if (hbi == null) {
+ hbi = new HbckInfo(null);
+ regionInfo.put(name, hbi);
+ }
+ return hbi;
+ }
- // comparator to sort KeyValues with latest modtime
- final Comparator comp = new Comparator() {
- public int compare(KeyValue k1, KeyValue k2) {
- return (int)(k1.getTimestamp() - k2.getTimestamp());
+ /**
+ * Check values in regionInfo for .META.
+ * Check if zero or more than one regions with META are found.
+ * If there are inconsistencies (i.e. zero or more than one regions
+ * pretend to be holding the .META.) try to fix that and report an error.
+ * @throws IOException from HBaseFsckRepair functions
+ */
+ boolean checkMetaEntries() throws IOException {
+ List metaRegions = Lists.newArrayList();
+ for (HbckInfo value : regionInfo.values()) {
+ if (value.metaEntry.isMetaTable()) {
+ metaRegions.add(value);
+ }
+ }
+
+ // If something is wrong
+ if (metaRegions.size() != 1) {
+ HRegionLocation rootLocation = connection.locateRegion(
+ HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
+ HbckInfo root =
+ regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
+
+ // If there is no region holding .META.
+ if (metaRegions.size() == 0) {
+ errors.reportError(".META. is not found on any region.");
+ if (shouldFix()) {
+ errors.print("Trying to fix a problem with .META...");
+ setShouldRerun();
+ // try to fix it (treat it as unassigned region)
+ HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
+ }
+ }
+ // If there are more than one regions pretending to hold the .META.
+ else if (metaRegions.size() > 1) {
+ errors.reportError(".META. is found on more than one region.");
+ if (shouldFix()) {
+ errors.print("Trying to fix a problem with .META...");
+ setShouldRerun();
+ // try fix it (treat is a dupe assignment)
+ List deployedOn = Lists.newArrayList();
+ for (HbckInfo mRegion : metaRegions) {
+ deployedOn.add(mRegion.metaEntry.regionServer);
}
- };
+ HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
+ }
+ }
+ // rerun hbck with hopefully fixed META
+ return false;
+ }
+ // no errors, so continue normally
+ return true;
+ }
- public boolean processRow(Result result) throws IOException {
- try {
+ /**
+ * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
+ * @throws IOException if an error is encountered
+ */
+ void getMetaEntries() throws IOException {
+ MetaScannerVisitor visitor = new MetaScannerVisitor() {
+ int countRecord = 1;
- // record the latest modification of this META record
- long ts = Collections.max(result.list(), comp).getTimestamp();
+ // comparator to sort KeyValues with latest modtime
+ final Comparator comp = new Comparator() {
+ public int compare(KeyValue k1, KeyValue k2) {
+ return (int)(k1.getTimestamp() - k2.getTimestamp());
+ }
+ };
- // record region details
- byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.REGIONINFO_QUALIFIER);
- HRegionInfo info = null;
- HServerAddress server = null;
- byte[] startCode = null;
- if (value != null) {
- info = Writables.getHRegionInfo(value);
- }
+ public boolean processRow(Result result) throws IOException {
+ try {
- // record assigned region server
- value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.SERVER_QUALIFIER);
- if (value != null && value.length > 0) {
- String address = Bytes.toString(value);
- server = new HServerAddress(address);
- }
+ // record the latest modification of this META record
+ long ts = Collections.max(result.list(), comp).getTimestamp();
- // record region's start key
- value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.STARTCODE_QUALIFIER);
- if (value != null) {
- startCode = value;
- }
- MetaEntry m = new MetaEntry(info, server, startCode, ts);
- m = regionList.put(m ,m);
- if (m != null) {
- throw new IOException("Two entries in META are same " + m);
- }
+ // record region details
+ byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.REGIONINFO_QUALIFIER);
+ HRegionInfo info = null;
+ HServerAddress server = null;
+ byte[] startCode = null;
+ if (value != null) {
+ info = Writables.getHRegionInfo(value);
+ }
- // show proof of progress to the user, once for every 100 records.
- if (countRecord % 100 == 0) {
- System.out.print(".");
- }
- countRecord++;
- return true;
- } catch (RuntimeException e) {
- LOG.error("Result=" + result);
- throw e;
+ // record assigned region server
+ value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.SERVER_QUALIFIER);
+ if (value != null && value.length > 0) {
+ String address = Bytes.toString(value);
+ server = new HServerAddress(address);
}
+
+ // record region's start key
+ value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.STARTCODE_QUALIFIER);
+ if (value != null) {
+ startCode = value;
+ }
+ MetaEntry m = new MetaEntry(info, server, startCode, ts);
+ HbckInfo hbInfo = new HbckInfo(m);
+ HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo);
+ if (previous != null) {
+ throw new IOException("Two entries in META are same " + previous);
+ }
+
+ // show proof of progress to the user, once for every 100 records.
+ if (countRecord % 100 == 0) {
+ errors.progress();
+ }
+ countRecord++;
+ return true;
+ } catch (RuntimeException e) {
+ LOG.error("Result=" + result);
+ throw e;
}
- };
- MetaScanner.metaScan(conf, visitor);
- System.out.println("");
+ }
+ };
+
+ // Scan -ROOT- to pick up META regions
+ MetaScanner.metaScan(conf, visitor, HConstants.ROOT_TABLE_NAME,
+ HConstants.EMPTY_START_ROW, Integer.MAX_VALUE);
+
+ // Scan .META. to pick up user regions
+ MetaScanner.metaScan(conf, visitor);
+ errors.print("");
}
/**
@@ -446,27 +684,159 @@
*/
private static class MetaEntry extends HRegionInfo {
HServerAddress regionServer; // server hosting this region
- byte[] startCode; // start value of region
long modTime; // timestamp of most recent modification metadata
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
byte[] startCode, long modTime) {
super(rinfo);
this.regionServer = regionServer;
- this.startCode = startCode;
this.modTime = modTime;
}
}
/**
- * Display the full report from fsck. This displays all live and dead region servers ,
- * and all known regions.
+ * Maintain information about a particular region.
*/
+ static class HbckInfo {
+ boolean onlyEdits = false;
+ MetaEntry metaEntry = null;
+ FileStatus foundRegionDir = null;
+ List deployedOn = Lists.newArrayList();
+
+ HbckInfo(MetaEntry metaEntry) {
+ this.metaEntry = metaEntry;
+ }
+
+ public String toString() {
+ if (metaEntry != null) {
+ return metaEntry.getRegionNameAsString();
+ } else if (foundRegionDir != null) {
+ return foundRegionDir.getPath().toString();
+ } else {
+ return "unknown region on " + Joiner.on(", ").join(deployedOn);
+ }
+ }
+ }
+
+ /**
+ * Prints summary of all tables found on the system.
+ */
+ private void printTableSummary() {
+ System.out.println("Summary:");
+ for (TInfo tInfo : tablesInfo.values()) {
+ if (tInfo.check()) {
+ System.out.println("Table " + tInfo.getName() + " is okay.");
+ }
+ else {
+ System.out.println("Table " + tInfo.getName() + " is inconsistent.");
+ }
+ System.out.println(" -- number of regions: " + tInfo.getNumRegions());
+ System.out.print(" -- deployed on:");
+ for (HServerAddress server : tInfo.deployedOn) {
+ System.out.print(" " + server.toString());
+ }
+ System.out.println("\n");
+ }
+ }
+
+ interface ErrorReporter {
+ public void reportError(String message);
+ public int summarize();
+ public void detail(String details);
+ public void progress();
+ public void print(String message);
+ }
+
+ private static class PrintingErrorReporter implements ErrorReporter {
+ public int errorCount = 0;
+ private int showProgress;
+
+ public void reportError(String message) {
+ if (!summary) {
+ System.out.println("ERROR: " + message);
+ }
+ errorCount++;
+ showProgress = 0;
+ }
+
+ public int summarize() {
+ System.out.println(Integer.toString(errorCount) +
+ " inconsistencies detected.");
+ if (errorCount == 0) {
+ System.out.println("Status: OK");
+ return 0;
+ } else {
+ System.out.println("Status: INCONSISTENT");
+ return -1;
+ }
+ }
+
+ public void print(String message) {
+ if (!summary) {
+ System.out.println(message);
+ }
+ }
+
+ public void detail(String message) {
+ if (details) {
+ System.out.println(message);
+ }
+ showProgress = 0;
+ }
+
+ public void progress() {
+ if (showProgress++ == 10) {
+ if (!summary) {
+ System.out.print(".");
+ }
+ showProgress = 0;
+ }
+ }
+ }
+
+ /**
+ * Display the full report from fsck.
+ * This displays all live and dead region servers, and all known regions.
+ */
void displayFullReport() {
details = true;
}
/**
+ * Set summary mode.
+ * Print only summary of the tables and status (OK or INCONSISTENT)
+ */
+ void setSummary() {
+ summary = true;
+ }
+
+ /**
+ * Check if we should rerun fsck again. This checks if we've tried to
+ * fix something and we should rerun fsck tool again.
+ * Display the full report from fsck. This displays all live and dead
+ * region servers, and all known regions.
+ */
+ void setShouldRerun() {
+ rerun = true;
+ }
+
+ boolean shouldRerun() {
+ return rerun;
+ }
+
+ /**
+ * Fix inconsistencies found by fsck. This should try to fix errors (if any)
+ * found by fsck utility.
+ */
+ void setFixErrors() {
+ fix = true;
+ }
+
+ boolean shouldFix() {
+ return fix;
+ }
+
+ /**
* We are interested in only those tables that have not changed their state in
* META during the last few seconds specified by hbase.admin.fsck.timelag
* @param seconds - the time in seconds
@@ -482,6 +852,9 @@
System.err.println(" -timelag {timeInSeconds} Process only regions that " +
" have not experienced any metadata updates in the last " +
" {{timeInSeconds} seconds.");
+ System.err.println(" -fix Try to fix some of the errors.");
+ System.err.println(" -summary Print only summary of the tables and status.");
+
Runtime.getRuntime().exit(-2);
}
@@ -515,6 +888,10 @@
printUsageAndExit();
}
i++;
+ } else if (cmd.equals("-fix")) {
+ fsck.setFixErrors();
+ } else if (cmd.equals("-summary")) {
+ fsck.setSummary();
} else {
String str = "Unknown command line option : " + cmd;
LOG.info(str);
@@ -524,6 +901,14 @@
}
// do the real work of fsck
int code = fsck.doWork();
+ // If we have changed the HBase state it is better to run fsck again
+ // to see if we haven't broken something else in the process.
+ // We run it only once more because otherwise we can easily fall into
+ // an infinite loop.
+ if (fsck.shouldRerun()) {
+ code = fsck.doWork();
+ }
+
Runtime.getRuntime().exit(code);
}
}
Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0)
+++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0)
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.ipc.HMasterInterface;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.zookeeper.ZKAssign;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.KeeperException;
+
+public class HBaseFsckRepair {
+
+ public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
+ List servers)
+ throws IOException {
+
+ HRegionInfo actualRegion = new HRegionInfo(region);
+
+ // Clear status in master and zk
+ clearInMaster(conf, actualRegion);
+ clearInZK(conf, actualRegion);
+
+ // Close region on the servers
+ for(HServerAddress server : servers) {
+ closeRegion(conf, server, actualRegion);
+ }
+
+ // It's unassigned so fix it as such
+ fixUnassigned(conf, actualRegion);
+ }
+
+ public static void fixUnassigned(Configuration conf, HRegionInfo region)
+ throws IOException {
+
+ HRegionInfo actualRegion = new HRegionInfo(region);
+
+ // Clear status in master and zk
+ clearInMaster(conf, actualRegion);
+ clearInZK(conf, actualRegion);
+
+ // Clear assignment in META or ROOT
+ clearAssignment(conf, actualRegion);
+ }
+
+ private static void clearInMaster(Configuration conf, HRegionInfo region)
+ throws IOException {
+ System.out.println("Region being cleared in master: " + region);
+ HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
+ long masterVersion =
+ master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
+ System.out.println("Master protocol version: " + masterVersion);
+ try {
+ // TODO: Do we want to do it this way?
+ // Better way is to tell master to fix the issue itself?
+ // That way it can use in-memory state to determine best plan
+// master.clearFromTransition(region);
+ } catch (Exception e) {}
+ }
+
+ private static void clearInZK(Configuration conf, HRegionInfo region)
+ throws IOException {
+ ZooKeeperWatcher zkw =
+ HConnectionManager.getConnection(conf).getZooKeeperWatcher();
+ try {
+ ZKAssign.deleteNodeFailSilent(zkw, region);
+ } catch (KeeperException e) {
+ throw new IOException("Unexpected ZK exception", e);
+ }
+ }
+
+ private static void closeRegion(Configuration conf, HServerAddress server,
+ HRegionInfo region)
+ throws IOException {
+ HRegionInterface rs =
+ HConnectionManager.getConnection(conf).getHRegionConnection(server);
+ rs.closeRegion(region, false);
+ }
+
+ private static void clearAssignment(Configuration conf,
+ HRegionInfo region)
+ throws IOException {
+ HTable ht = null;
+ if (region.isMetaTable()) {
+ // Clear assignment in ROOT
+ ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
+ }
+ else {
+ // Clear assignment in META
+ ht = new HTable(conf, HConstants.META_TABLE_NAME);
+ }
+ Delete del = new Delete(region.getRegionName());
+ del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+ del.deleteColumns(HConstants.CATALOG_FAMILY,
+ HConstants.STARTCODE_QUALIFIER);
+ ht.delete(del);
+ }
+}