Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy)
@@ -1509,7 +1509,7 @@
getOutboundMsgs().add(new HMsg(HMsg.Type.MSG_REPORT_PROCESS_OPEN, hri));
}
- protected void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted)
+ public void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted)
throws IOException {
RSZookeeperUpdater zkUpdater = null;
if(reportWhenCompleted) {
@@ -2549,4 +2549,4 @@
public int getNumberOfOnlineRegions() {
return onlineRegions.size();
}
-}
\ No newline at end of file
+}
Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy)
@@ -1401,4 +1401,10 @@
public static void main(String [] args) {
doMain(args, HMaster.class);
}
+
+ @Override
+ public void clearFromTransition(HRegionInfo region) {
+ this.regionManager.clearFromInTransition(region.getRegionName());
+ LOG.info("Cleared region " + region + " from transition map");
+ }
}
Index: src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/ipc/HBaseRPCProtocolVersion.java (working copy)
@@ -76,6 +76,7 @@
*
Version 22: HBASE-2209. Added List support to RPC
* Version 23: HBASE-2066, multi-put.
* Version 24: HBASE-2473, create table with regions.
+ * Version 24: HBASE-2819, HBCK changes to master and RS
*
*/
public static final long versionID = 24L;
Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (working copy)
@@ -22,6 +22,7 @@
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.io.Writable;
@@ -128,4 +129,10 @@
* @return status object
*/
public ClusterStatus getClusterStatus();
+
+ /**
+ * Clears the specified region from being in transition. Used by HBaseFsck.
+ * @param region region to clear from transition map
+ */
+ public void clearFromTransition(HRegionInfo region);
}
Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy)
@@ -298,4 +298,12 @@
*/
public void replicateLogEntries(HLog.Entry[] entries) throws IOException;
+ /**
+ * Closes the specified region.
+ * @param hri region to be closed
+ * @param reportWhenCompleted whether to report to master
+ * @throws IOException
+ */
+ public void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted)
+ throws IOException;
}
Index: src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java (working copy)
@@ -86,6 +86,28 @@
MetaScannerVisitor visitor, byte[] tableName, byte[] row,
int rowLimit)
throws IOException {
+ metaScan(configuration, visitor, HConstants.META_TABLE_NAME, tableName, row, rowLimit);
+ }
+
+ /**
+ * Scans the meta table and calls a visitor on each RowResult. Uses a table
+ * name and a row name to locate meta regions. And it only scans at most
+ * rowLimit of rows.
+ *
+ * @param configuration HBase configuration.
+ * @param visitor Visitor object.
+ * @param metaTableName Meta table name (usually .META.)
+ * @param tableName User table name.
+ * @param row Name of the row at the user table. The scan will start from
+ * the region row where the row resides.
+ * @param rowLimit Max of processed rows. If it is less than 0, it
+ * will be set to default value Integer.MAX_VALUE.
+ * @throws IOException e
+ */
+ public static void metaScan(Configuration configuration, MetaScannerVisitor visitor,
+ byte[] metaTableName, byte[] tableName, byte[] row,
+ int rowLimit)
+ throws IOException {
int rowUpperLimit = rowLimit > 0 ? rowLimit: Integer.MAX_VALUE;
HConnection connection = HConnectionManager.getConnection(configuration);
@@ -99,7 +121,7 @@
HRegionInfo.createRegionName(tableName, row, HConstants.NINES,
false);
- HTable metaTable = new HTable(configuration, HConstants.META_TABLE_NAME);
+ HTable metaTable = new HTable(configuration, metaTableName);
Result startRowResult = metaTable.getRowOrBefore(searchRow,
HConstants.CATALOG_FAMILY);
if (startRowResult == null) {
@@ -132,7 +154,7 @@
configuration.getInt("hbase.meta.scanner.caching", 100));
do {
final Scan scan = new Scan(startRow).addFamily(HConstants.CATALOG_FAMILY);
- callable = new ScannerCallable(connection, HConstants.META_TABLE_NAME,
+ callable = new ScannerCallable(connection, metaTableName,
scan);
// Open scanner
connection.getRegionServerWithRetries(callable);
Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java (revision 999137)
+++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsck.java (working copy)
@@ -23,6 +23,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
@@ -37,36 +38,42 @@
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
-import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+
/**
* Check consistency among the in-memory states of the master and the
* region server(s) and the state of data in HDFS.
*/
-public class HBaseFsck extends HBaseAdmin {
+public class HBaseFsck {
public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
private Configuration conf;
- private FileSystem fs;
- private Path rootDir;
private ClusterStatus status;
- private HMasterInterface master;
private HConnection connection;
- private TreeMap metaEntries;
+ private TreeMap regionInfo = new TreeMap();
+ private TreeMap tablesInfo = new TreeMap();
+ ErrorReporter errors = new PrintingErrorReporter();
- private boolean details = false; // do we display the full report?
+ private static boolean details = false; // do we display the full report
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
+ private boolean fix = false; // do we want to try fixing the errors?
+ private boolean rerun = false; // if we tried to fix something rerun hbck
+ private static boolean summary = false; // if we want to print less output
/**
* Constructor
@@ -76,19 +83,12 @@
*/
public HBaseFsck(Configuration conf)
throws MasterNotRunningException, IOException {
- super(conf);
this.conf = conf;
- // setup filesystem properties
- this.rootDir = new Path(conf.get(HConstants.HBASE_DIR));
- this.fs = rootDir.getFileSystem(conf);
-
-
// fetch information from master
- master = getMaster();
- status = master.getClusterStatus();
- connection = getConnection();
- this.metaEntries = new TreeMap();
+ HBaseAdmin admin = new HBaseAdmin(conf);
+ status = admin.getMaster().getClusterStatus();
+ connection = admin.getConnection();
}
/**
@@ -97,24 +97,41 @@
* @return 0 on success, non-zero on failure
*/
int doWork() throws IOException {
+
// print hbase server version
- System.out.println("Version: " + status.getHBaseVersion());
+ errors.print("Version: " + status.getHBaseVersion());
+ // Make sure regionInfo is empty before starting
+ regionInfo.clear();
+ tablesInfo.clear();
+
// get a list of all regions from the master. This involves
// scanning the META table
- getMetaEntries(metaEntries);
+ if (!recordRootRegion()) {
+ // Will remove later if we can fix it
+ errors.reportError("Encountered fatal error. Exitting...");
+ return -1;
+ }
+ getMetaEntries();
+ // Check if .META. is found only once and on the right place
+ if (!checkMetaEntries()) {
+ // Will remove later if we can fix it
+ errors.reportError("Encountered fatal error. Exitting...");
+ return -1;
+ }
+
// get a list of all tables that have not changed recently.
AtomicInteger numSkipped = new AtomicInteger(0);
- HTableDescriptor[] allTables = getTables(metaEntries, numSkipped);
- System.out.println("Number of Tables: " + allTables.length);
+ HTableDescriptor[] allTables = getTables(numSkipped);
+ errors.print("Number of Tables: " + allTables.length);
if (details) {
if (numSkipped.get() > 0) {
- System.out.println("\n Number of Tables in flux: " + numSkipped.get());
+ errors.detail("\n Number of Tables in flux: " + numSkipped.get());
}
for (HTableDescriptor td : allTables) {
String tableName = td.getNameAsString();
- System.out.println("\t Table: " + tableName + "\t" +
+ errors.detail("\t Table: " + tableName + "\t" +
(td.isReadOnly() ? "ro" : "rw") + "\t" +
(td.isRootRegion() ? "ROOT" :
(td.isMetaRegion() ? "META" : " ")) + "\t" +
@@ -124,221 +141,375 @@
// From the master, get a list of all known live region servers
Collection regionServers = status.getServerInfo();
- System.out.println("Number of live region servers:" +
+ errors.print("Number of live region servers:" +
regionServers.size());
if (details) {
for (HServerInfo rsinfo: regionServers) {
- System.out.println("\t RegionServer:" + rsinfo.getServerName());
+ errors.detail("\t RegionServer:" + rsinfo.getServerName());
}
}
// From the master, get a list of all dead region servers
Collection deadRegionServers = status.getDeadServerNames();
- System.out.println("Number of dead region servers:" +
+ errors.print("Number of dead region servers:" +
deadRegionServers.size());
if (details) {
for (String name: deadRegionServers) {
- System.out.println("\t RegionServer(dead):" + name);
+ errors.detail("\t RegionServer(dead):" + name);
}
}
- // process information from all region servers
- boolean status1 = processRegionServers(regionServers);
+ // Determine what's deployed
+ processRegionServers(regionServers);
- // match HDFS with META
- boolean status2 = checkHdfs();
+ // Determine what's on HDFS
+ checkHdfs();
- if (status1 == true && status2 == true) {
- System.out.println("\nRest easy, buddy! HBase is clean. ");
- return 0;
- } else {
- System.out.println("\nInconsistencies detected.");
- return -1;
- }
+ // Check consistency
+ checkConsistency();
+
+ // Check integrity
+ checkIntegrity();
+
+ // Print table summary
+ printTableSummary();
+
+ return errors.summarize();
}
/**
- * Checks HDFS and META
- * @return true if there were no errors, otherwise return false
+ * Scan HDFS for all regions, recording their information into
+ * regionInfo
*/
- boolean checkHdfs() throws IOException {
+ void checkHdfs() throws IOException {
+ Path rootDir = new Path(conf.get(HConstants.HBASE_DIR));
+ FileSystem fs = rootDir.getFileSystem(conf);
- boolean status = true; // success
-
- // make a copy of all tables in META
- TreeMap regions = new TreeMap();
- for (MetaEntry meta: metaEntries.values()) {
- regions.put(meta.getTableDesc().getNameAsString(), meta);
- }
-
// list all tables from HDFS
- TreeMap allTableDirs = new TreeMap();
+ List tableDirs = Lists.newArrayList();
+
+ boolean foundVersionFile = false;
FileStatus[] files = fs.listStatus(rootDir);
- for (int i = 0; files != null && i < files.length; i++) {
- allTableDirs.put(files[i].getPath(), files[i]);
+ for (FileStatus file : files) {
+ if (file.getPath().getName().equals(HConstants.VERSION_FILE_NAME)) {
+ foundVersionFile = true;
+ } else {
+ tableDirs.add(file);
+ }
}
-
- // verify that -ROOT-, .META directories exists.
- Path rdir = new Path(rootDir, Bytes.toString(HConstants.ROOT_TABLE_NAME));
- FileStatus ignore = allTableDirs.remove(rdir);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Path " + rdir + " for ROOT table does not exist.");
- }
- Path mdir = new Path(rootDir, Bytes.toString(HConstants.META_TABLE_NAME));
- ignore = allTableDirs.remove(mdir);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Path " + mdir + " for META table does not exist.");
- }
-
+
// verify that version file exists
- Path vfile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
- ignore = allTableDirs.remove(vfile);
- if (ignore == null) {
- status = false;
- System.out.print("\nERROR: Version file " + vfile + " does not exist.");
+ if (!foundVersionFile) {
+ errors.reportError("Version file does not exist in root dir " + rootDir);
}
- // filter out all valid regions found in the META
- for (HRegionInfo rinfo: metaEntries.values()) {
- Path tableDir = HTableDescriptor.getTableDir(rootDir,
- rinfo.getTableDesc().getName());
- // Path regionDir = HRegion.getRegionDir(tableDir, rinfo.getEncodedName());
- // if the entry exists in allTableDirs, then remove it from allTableDirs as well
- // as from the META tmp list
- FileStatus found = allTableDirs.remove(tableDir);
- if (found != null) {
- regions.remove(tableDir.getName());
- }
- }
+ // level 1: /*
+ for (FileStatus tableDir : tableDirs) {
+ String tableName = tableDir.getPath().getName();
+ // ignore hidden files
+ if (tableName.startsWith(".") &&
+ !tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
+ continue;
+ // level 2: //*
+ FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
+ for (FileStatus regionDir : regionDirs) {
+ String encodedName = regionDir.getPath().getName();
- // The remaining entries in allTableDirs do not have entries in .META
- // However, if the path name was modified in the last few milliseconds
- // as specified by timelag, then do not flag it as an inconsistency.
- long now = System.currentTimeMillis();
- for (FileStatus region: allTableDirs.values()) {
- if (region.getModificationTime() + timelag < now) {
- String finalComponent = region.getPath().getName();
- if (!finalComponent.startsWith(".")) {
- // ignore .logs and .oldlogs directories
- System.out.print("\nERROR: Path " + region.getPath() +
- " does not have a corresponding entry in META.");
- status = false;
+ // ignore directories that aren't hexadecimal
+ if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
+
+ HbckInfo hbi = getOrCreateInfo(encodedName);
+ hbi.foundRegionDir = regionDir;
+
+ // Set a flag if this region contains only edits
+ // This is special case if a region is left after split
+ hbi.onlyEdits = true;
+ FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
+ Path ePath = HLog.getRegionDirRecoveredEditsDir(regionDir.getPath());
+ for (FileStatus subDir : subDirs) {
+ String sdName = subDir.getPath().getName();
+ if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
+ hbi.onlyEdits = false;
+ break;
+ }
}
}
}
-
- // the remaining entries in tmp do not have entries in HDFS
- for (HRegionInfo rinfo: regions.values()) {
- System.out.println("\nERROR: Region " + rinfo.getRegionNameAsString() +
- " does not have a corresponding entry in HDFS.");
- status = false;
+ }
+
+ /**
+ * Record the location of the ROOT region as found in ZooKeeper,
+ * as if it were in a META table. This is so that we can check
+ * deployment of ROOT.
+ */
+ boolean recordRootRegion() throws IOException {
+ HRegionLocation rootLocation = connection.locateRegion(
+ HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
+
+ // Check if Root region is valid and existing
+ if (rootLocation == null || rootLocation.getRegionInfo() == null ||
+ rootLocation.getServerAddress() == null) {
+ errors.reportError("Root Region or some of its attributes is null.");
+ return false;
}
- return status;
+
+ MetaEntry m = new MetaEntry(rootLocation.getRegionInfo(),
+ rootLocation.getServerAddress(), null, System.currentTimeMillis());
+ HbckInfo hbInfo = new HbckInfo(m);
+ regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
+ return true;
}
+
/**
* Contacts each regionserver and fetches metadata about regions.
* @param regionServerList - the list of region servers to connect to
* @throws IOException if a remote or network exception occurs
- * @return true if there were no errors, otherwise return false
*/
- boolean processRegionServers(Collection regionServerList)
+ void processRegionServers(Collection regionServerList)
throws IOException {
- // make a copy of all entries in META
- TreeMap tmp =
- new TreeMap(metaEntries);
- long errorCount = 0; // number of inconsistencies detected
- int showProgress = 0;
-
// loop to contact each region server
- for (HServerInfo rsinfo: regionServerList) {
- showProgress++; // one more server.
+ for (HServerInfo rsinfo:regionServerList) {
+ errors.progress();
try {
HRegionInterface server = connection.getHRegionConnection(
rsinfo.getServerAddress());
// list all online regions from this region server
HRegionInfo[] regions = server.getRegionsAssignment();
+
if (details) {
- System.out.print("\nRegionServer:" + rsinfo.getServerName() +
- " number of regions:" + regions.length);
+ errors.detail("\nRegionServer:" + rsinfo.getServerName() +
+ " number of regions:" + regions.length);
for (HRegionInfo rinfo: regions) {
- System.out.print("\n\t name:" + rinfo.getRegionNameAsString() +
- " id:" + rinfo.getRegionId() +
- " encoded name:" + rinfo.getEncodedName() +
- " start :" + Bytes.toStringBinary(rinfo.getStartKey()) +
- " end :" + Bytes.toStringBinary(rinfo.getEndKey()));
+ errors.detail("\n\t name:" + rinfo.getRegionNameAsString() +
+ " id:" + rinfo.getRegionId() +
+ " encoded name:" + rinfo.getEncodedName() +
+ " start :" + Bytes.toStringBinary(rinfo.getStartKey()) +
+ " end :" + Bytes.toStringBinary(rinfo.getEndKey()));
}
- showProgress = 0;
}
// check to see if the existance of this region matches the region in META
- for (HRegionInfo r: regions) {
- MetaEntry metaEntry = metaEntries.get(r);
-
- // this entry exists in the region server but is not in the META
- if (metaEntry == null) {
- if (r.isMetaRegion()) {
- continue; // this is ROOT or META region
- }
- System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
- " found on server " + rsinfo.getServerAddress() +
- " but is not listed in META.");
- errorCount++;
- showProgress = 0;
- continue;
- }
- if (!metaEntry.regionServer.equals(rsinfo.getServerAddress())) {
- System.out.print("\nERROR: Region " + r.getRegionNameAsString() +
- " found on server " + rsinfo.getServerAddress() +
- " but is listed in META to be on server " +
- metaEntry.regionServer);
- errorCount++;
- showProgress = 0;
- }
-
- // The region server is indeed serving a valid region. Remove it from tmp
- tmp.remove(r);
+ for (HRegionInfo r:regions) {
+ HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
+ hbi.deployedOn.add(rsinfo.getServerAddress());
}
} catch (IOException e) { // unable to connect to the region server.
- if (details) {
- System.out.print("\nRegionServer:" + rsinfo.getServerName() +
- " Unable to fetch region information. " + e);
- }
+ errors.reportError("RegionServer: " + rsinfo.getServerName() +
+ " Unable to fetch region information. " + e);
}
- if (showProgress % 10 == 0) {
- System.out.print("."); // show progress to user
- showProgress = 0;
+ }
+ }
+
+ /**
+ * Check consistency of all regions that have been found in previous phases.
+ */
+ void checkConsistency() throws IOException {
+ for (HbckInfo hbi : regionInfo.values()) {
+ doConsistencyCheck(hbi);
+ }
+ }
+
+ /**
+ * Check a single region for consistency and correct deployment.
+ */
+ void doConsistencyCheck(HbckInfo hbi) throws IOException {
+ String descriptiveName = hbi.toString();
+
+ boolean inMeta = hbi.metaEntry != null;
+ boolean inHdfs = hbi.foundRegionDir != null;
+ boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
+ boolean isDeployed = !hbi.deployedOn.isEmpty();
+ boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
+ boolean deploymentMatchesMeta =
+ hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
+ hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
+ boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
+ boolean recentlyModified = hbi.foundRegionDir != null &&
+ hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
+
+ // ========== First the healthy cases =============
+ if (hbi.onlyEdits) {
+ return;
+ }
+ if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
+ return;
+ } else if (inMeta && !shouldBeDeployed && !isDeployed) {
+ // offline regions shouldn't cause complaints
+ LOG.debug("Region " + descriptiveName + " offline, ignoring.");
+ return;
+ } else if (recentlyModified) {
+ LOG.info("Region " + descriptiveName + " was recently modified -- skipping");
+ return;
+ }
+ // ========== Cases where the region is not in META =============
+ else if (!inMeta && !inHdfs && !isDeployed) {
+ // We shouldn't have record of this region at all then!
+ assert false : "Entry for region with no data";
+ } else if (!inMeta && !inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " not on HDFS or in META but " +
+ "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (!inMeta && inHdfs && !isDeployed) {
+ errors.reportError("Region " + descriptiveName + " on HDFS, but not listed in META " +
+ "or deployed on any region server.");
+ } else if (!inMeta && inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " not in META, but deployed on " +
+ Joiner.on(", ").join(hbi.deployedOn));
+
+ // ========== Cases where the region is in META =============
+ } else if (inMeta && !inHdfs && !isDeployed) {
+ errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS " +
+ "or deployed on any region server.");
+ } else if (inMeta && !inHdfs && isDeployed) {
+ errors.reportError("Region " + descriptiveName + " found in META, but not in HDFS, " +
+ "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
+ errors.reportError("Region " + descriptiveName + " not deployed on any region server.");
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix unassigned region...");
+ setShouldRerun();
+ HBaseFsckRepair.fixUnassigned(this.conf, hbi.metaEntry);
}
+ } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
+ errors.reportError("Region " + descriptiveName + " has should not be deployed according " +
+ "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
+ } else if (inMeta && inHdfs && isMultiplyDeployed) {
+ errors.reportError("Region " + descriptiveName + " is listed in META on region server " +
+ hbi.metaEntry.regionServer + " but is multiply assigned to region servers " +
+ Joiner.on(", ").join(hbi.deployedOn));
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix assignment error...");
+ setShouldRerun();
+ HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
+ }
+ } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
+ errors.reportError("Region " + descriptiveName + " listed in META on region server " +
+ hbi.metaEntry.regionServer + " but found on region server " +
+ hbi.deployedOn.get(0));
+ // If we are trying to fix the errors
+ if (shouldFix()) {
+ errors.print("Trying to fix assignment error...");
+ setShouldRerun();
+ HBaseFsckRepair.fixDupeAssignment(this.conf, hbi.metaEntry, hbi.deployedOn);
+ }
+ } else {
+ errors.reportError("Region " + descriptiveName + " is in an unforeseen state:" +
+ " inMeta=" + inMeta +
+ " inHdfs=" + inHdfs +
+ " isDeployed=" + isDeployed +
+ " isMultiplyDeployed=" + isMultiplyDeployed +
+ " deploymentMatchesMeta=" + deploymentMatchesMeta +
+ " shouldBeDeployed=" + shouldBeDeployed);
}
+ }
- // all the region left in tmp are not found on any region server
- for (MetaEntry metaEntry: tmp.values()) {
- // An offlined region will not be present out on a regionserver. A region
- // is offlined if table is offlined -- will still have an entry in .META.
- // of a region is offlined because its a parent region and its daughters
- // still have references.
- if (metaEntry.isOffline()) continue;
- System.out.print("\nERROR: Region " + metaEntry.getRegionNameAsString() +
- " is not served by any region server " +
- " but is listed in META to be on server " +
- metaEntry.regionServer);
- errorCount++;
+ /**
+ * Checks tables integrity. Goes over all regions and scans the tables.
+ * Collects all the pieces for each table and checks if there are missing,
+ * repeated or overlapping ones.
+ */
+ void checkIntegrity() {
+ for (HbckInfo hbi : regionInfo.values()) {
+ // Check only valid, working regions
+ if (hbi.metaEntry == null) continue;
+ if (hbi.metaEntry.regionServer == null) continue;
+ if (hbi.foundRegionDir == null) continue;
+ if (hbi.deployedOn.size() != 1) continue;
+ if (hbi.onlyEdits) continue;
+
+ // We should be safe here
+ String tableName = hbi.metaEntry.getTableDesc().getNameAsString();
+ TInfo modTInfo = tablesInfo.get(tableName);
+ if (modTInfo == null) {
+ modTInfo = new TInfo(tableName);
+ }
+ for (HServerAddress server : hbi.deployedOn) {
+ modTInfo.addServer(server);
+ }
+ modTInfo.addEdge(hbi.metaEntry.getStartKey(), hbi.metaEntry.getEndKey());
+ tablesInfo.put(tableName, modTInfo);
}
+
+ for (TInfo tInfo : tablesInfo.values()) {
+ if (!tInfo.check()) {
+ errors.reportError("Found inconsistency in table " + tInfo.getName());
+ }
+ }
+ }
- if (errorCount > 0) {
- System.out.println("\nDetected " + errorCount + " inconsistencies. " +
- "This might not indicate a real problem because these regions " +
- "could be in the midst of a split. Consider re-running with a " +
- "larger value of -timelag.");
- return false;
+ /**
+ * Maintain information about a particular table.
+ */
+ private class TInfo {
+ String tableName;
+ TreeMap edges;
+ TreeSet deployedOn;
+
+ TInfo(String name) {
+ this.tableName = name;
+ edges = new TreeMap (Bytes.BYTES_COMPARATOR);
+ deployedOn = new TreeSet ();
}
- return true; // no errors
+
+ public void addEdge(byte[] fromNode, byte[] toNode) {
+ this.edges.put(fromNode, toNode);
+ }
+
+ public void addServer(HServerAddress server) {
+ this.deployedOn.add(server);
+ }
+
+ public String getName() {
+ return tableName;
+ }
+
+ public int getNumRegions() {
+ return edges.size();
+ }
+
+ public boolean check() {
+ byte[] last = new byte[0];
+ byte[] next = new byte[0];
+ TreeSet visited = new TreeSet(Bytes.BYTES_COMPARATOR);
+ // Each table should start with a zero-length byte[] and end at a
+ // zero-length byte[]. Just follow the edges to see if this is true
+ while (true) {
+ // Check if chain is broken
+ if (!edges.containsKey(last)) {
+ errors.detail("Chain of regions in table " + tableName +
+ " is broken.");
+ return false;
+ }
+ next = edges.get(last);
+ // Found a cycle
+ if (visited.contains(next)) {
+ errors.detail("Chain of regions in table " + tableName +
+ " has a cycle.");
+ return false;
+ }
+ // Mark next node as visited
+ visited.add(next);
+ // If next is zero-length byte[] we are possibly at the end of the chain
+ if (next.length == 0) {
+ // If we have visited all elements we are fine
+ if (edges.size() != visited.size()) {
+ errors.detail("Chain of regions in table " + tableName +
+ " contains less elements than are listed in META.");
+ return false;
+ }
+ return true;
+ }
+ last = next;
+ }
+ // How did we get here?
+ }
}
+
/**
* Return a list of table names whose metadata have not been modified in the
* last few milliseconds specified by timelag
@@ -349,18 +520,17 @@
* @return tables that have not been modified recently
* @throws IOException if an error is encountered
*/
- HTableDescriptor[] getTables(final TreeMap regionList,
- AtomicInteger numSkipped) {
+ HTableDescriptor[] getTables(AtomicInteger numSkipped) {
TreeSet uniqueTables = new TreeSet();
long now = System.currentTimeMillis();
- for (MetaEntry m: regionList.values()) {
- HRegionInfo info = m;
+ for (HbckInfo hbi : regionInfo.values()) {
+ MetaEntry info = hbi.metaEntry;
// if the start key is zero, then we have found the first region of a table.
// pick only those tables that were not modified in the last few milliseconds.
if (info != null && info.getStartKey().length == 0) {
- if (m.modTime + timelag < now) {
+ if (info.modTime + timelag < now) {
uniqueTables.add(info.getTableDesc());
} else {
numSkipped.incrementAndGet(); // one more in-flux table
@@ -371,71 +541,145 @@
}
/**
- * Scan META. Returns a list of all regions of all known tables.
- * @param regionList - fill up all entries found in .META
- * @throws IOException if an error is encountered
+ * Gets the entry in regionInfo corresponding to the the given encoded
+ * region name. If the region has not been seen yet, a new entry is added
+ * and returned.
*/
- void getMetaEntries(final TreeMap regionList) throws IOException {
- MetaScannerVisitor visitor = new MetaScannerVisitor() {
- int countRecord = 1;
+ private HbckInfo getOrCreateInfo(String name) {
+ HbckInfo hbi = regionInfo.get(name);
+ if (hbi == null) {
+ hbi = new HbckInfo(null);
+ regionInfo.put(name, hbi);
+ }
+ return hbi;
+ }
- // comparator to sort KeyValues with latest modtime
- final Comparator comp = new Comparator() {
- public int compare(KeyValue k1, KeyValue k2) {
- return (int)(k1.getTimestamp() - k2.getTimestamp());
+ /**
+ * Check values in regionInfo for .META.
+ * Check if zero or more than one regions with META are found.
+ * If there are inconsistencies (i.e. zero or more than one regions
+ * pretend to be holding the .META.) try to fix that and report an error.
+ * @throws IOException from HBaseFsckRepair functions
+ */
+ boolean checkMetaEntries() throws IOException {
+ List metaRegions = Lists.newArrayList();
+ for (HbckInfo value : regionInfo.values()) {
+ if (value.metaEntry.isMetaTable()) {
+ metaRegions.add(value);
+ }
+ }
+
+ // If something is wrong
+ if (metaRegions.size() != 1) {
+ HRegionLocation rootLocation = connection.locateRegion(
+ HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW);
+ HbckInfo root =
+ regionInfo.get(rootLocation.getRegionInfo().getEncodedName());
+
+ // If there is no region holding .META.
+ if (metaRegions.size() == 0) {
+ errors.reportError(".META. is not found on any region.");
+ if (shouldFix()) {
+ errors.print("Trying to fix a problem with .META...");
+ setShouldRerun();
+ // try to fix it (treat it as unassigned region)
+ HBaseFsckRepair.fixUnassigned(conf, root.metaEntry);
+ }
+ }
+ // If there are more than one regions pretending to hold the .META.
+ else if (metaRegions.size() > 1) {
+ errors.reportError(".META. is found on more than one region.");
+ if (shouldFix()) {
+ errors.print("Trying to fix a problem with .META...");
+ setShouldRerun();
+ // try fix it (treat is a dupe assignment)
+ List deployedOn = Lists.newArrayList();
+ for (HbckInfo mRegion : metaRegions) {
+ deployedOn.add(mRegion.metaEntry.regionServer);
}
- };
+ HBaseFsckRepair.fixDupeAssignment(conf, root.metaEntry, deployedOn);
+ }
+ }
+ // rerun hbck with hopefully fixed META
+ return false;
+ }
+ // no errors, so continue normally
+ return true;
+ }
- public boolean processRow(Result result) throws IOException {
- try {
+ /**
+ * Scan .META. and -ROOT-, adding all regions found to the regionInfo map.
+ * @throws IOException if an error is encountered
+ */
+ void getMetaEntries() throws IOException {
+ MetaScannerVisitor visitor = new MetaScannerVisitor() {
+ int countRecord = 1;
- // record the latest modification of this META record
- long ts = Collections.max(result.list(), comp).getTimestamp();
+ // comparator to sort KeyValues with latest modtime
+ final Comparator comp = new Comparator() {
+ public int compare(KeyValue k1, KeyValue k2) {
+ return (int)(k1.getTimestamp() - k2.getTimestamp());
+ }
+ };
- // record region details
- byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.REGIONINFO_QUALIFIER);
- HRegionInfo info = null;
- HServerAddress server = null;
- byte[] startCode = null;
- if (value != null) {
- info = Writables.getHRegionInfo(value);
- }
+ public boolean processRow(Result result) throws IOException {
+ try {
- // record assigned region server
- value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.SERVER_QUALIFIER);
- if (value != null && value.length > 0) {
- String address = Bytes.toString(value);
- server = new HServerAddress(address);
- }
+ // record the latest modification of this META record
+ long ts = Collections.max(result.list(), comp).getTimestamp();
- // record region's start key
- value = result.getValue(HConstants.CATALOG_FAMILY,
- HConstants.STARTCODE_QUALIFIER);
- if (value != null) {
- startCode = value;
- }
- MetaEntry m = new MetaEntry(info, server, startCode, ts);
- m = regionList.put(m ,m);
- if (m != null) {
- throw new IOException("Two entries in META are same " + m);
- }
+ // record region details
+ byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.REGIONINFO_QUALIFIER);
+ HRegionInfo info = null;
+ HServerAddress server = null;
+ byte[] startCode = null;
+ if (value != null) {
+ info = Writables.getHRegionInfo(value);
+ }
- // show proof of progress to the user, once for every 100 records.
- if (countRecord % 100 == 0) {
- System.out.print(".");
- }
- countRecord++;
- return true;
- } catch (RuntimeException e) {
- LOG.error("Result=" + result);
- throw e;
+ // record assigned region server
+ value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.SERVER_QUALIFIER);
+ if (value != null && value.length > 0) {
+ String address = Bytes.toString(value);
+ server = new HServerAddress(address);
}
+
+ // record region's start key
+ value = result.getValue(HConstants.CATALOG_FAMILY,
+ HConstants.STARTCODE_QUALIFIER);
+ if (value != null) {
+ startCode = value;
+ }
+ MetaEntry m = new MetaEntry(info, server, startCode, ts);
+ HbckInfo hbInfo = new HbckInfo(m);
+ HbckInfo previous = regionInfo.put(info.getEncodedName(), hbInfo);
+ if (previous != null) {
+ throw new IOException("Two entries in META are same " + previous);
+ }
+
+ // show proof of progress to the user, once for every 100 records.
+ if (countRecord % 100 == 0) {
+ errors.progress();
+ }
+ countRecord++;
+ return true;
+ } catch (RuntimeException e) {
+ LOG.error("Result=" + result);
+ throw e;
}
- };
- MetaScanner.metaScan(conf, visitor);
- System.out.println("");
+ }
+ };
+
+ // Scan -ROOT- to pick up META regions
+ MetaScanner.metaScan(conf, visitor,
+ HConstants.ROOT_TABLE_NAME, HConstants.EMPTY_START_ROW, null,
+ Integer.MAX_VALUE);
+
+ // Scan .META. to pick up user regions
+ MetaScanner.metaScan(conf, visitor);
+ errors.print("");
}
/**
@@ -443,27 +687,159 @@
*/
private static class MetaEntry extends HRegionInfo {
HServerAddress regionServer; // server hosting this region
- byte[] startCode; // start value of region
long modTime; // timestamp of most recent modification metadata
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
byte[] startCode, long modTime) {
super(rinfo);
this.regionServer = regionServer;
- this.startCode = startCode;
this.modTime = modTime;
}
}
/**
- * Display the full report from fsck. This displays all live and dead region servers ,
- * and all known regions.
+ * Maintain information about a particular region.
*/
+ static class HbckInfo {
+ boolean onlyEdits = false;
+ MetaEntry metaEntry = null;
+ FileStatus foundRegionDir = null;
+ List deployedOn = Lists.newArrayList();
+
+ HbckInfo(MetaEntry metaEntry) {
+ this.metaEntry = metaEntry;
+ }
+
+ public String toString() {
+ if (metaEntry != null) {
+ return metaEntry.getRegionNameAsString();
+ } else if (foundRegionDir != null) {
+ return foundRegionDir.getPath().toString();
+ } else {
+ return "unknown region on " + Joiner.on(", ").join(deployedOn);
+ }
+ }
+ }
+
+ /**
+ * Prints summary of all tables found on the system.
+ */
+ private void printTableSummary() {
+ System.out.println("Summary:");
+ for (TInfo tInfo : tablesInfo.values()) {
+ if (tInfo.check()) {
+ System.out.println("Table " + tInfo.getName() + " is okay.");
+ }
+ else {
+ System.out.println("Table " + tInfo.getName() + " is inconsistent.");
+ }
+ System.out.println(" -- number of regions: " + tInfo.getNumRegions());
+ System.out.print(" -- deployed on:");
+ for (HServerAddress server : tInfo.deployedOn) {
+ System.out.print(" " + server.toString());
+ }
+ System.out.println("\n");
+ }
+ }
+
+ interface ErrorReporter {
+ public void reportError(String message);
+ public int summarize();
+ public void detail(String details);
+ public void progress();
+ public void print(String message);
+ }
+
+ private static class PrintingErrorReporter implements ErrorReporter {
+ public int errorCount = 0;
+ private int showProgress;
+
+ public void reportError(String message) {
+ if (!summary) {
+ System.out.println("ERROR: " + message);
+ }
+ errorCount++;
+ showProgress = 0;
+ }
+
+ public int summarize() {
+ System.out.println(Integer.toString(errorCount) +
+ " inconsistencies detected.");
+ if (errorCount == 0) {
+ System.out.println("Status: OK");
+ return 0;
+ } else {
+ System.out.println("Status: INCONSISTENT");
+ return -1;
+ }
+ }
+
+ public void print(String message) {
+ if (!summary) {
+ System.out.println(message);
+ }
+ }
+
+ public void detail(String message) {
+ if (details) {
+ System.out.println(message);
+ }
+ showProgress = 0;
+ }
+
+ public void progress() {
+ if (showProgress++ == 10) {
+ if (!summary) {
+ System.out.print(".");
+ }
+ showProgress = 0;
+ }
+ }
+ }
+
+ /**
+ * Display the full report from fsck.
+ * This displays all live and dead region servers, and all known regions.
+ */
void displayFullReport() {
details = true;
}
/**
+ * Set summary mode.
+ * Print only summary of the tables and status (OK or INCONSISTENT)
+ */
+ void setSummary() {
+ summary = true;
+ }
+
+ /**
+ * Check if we should rerun fsck again. This checks if we've tried to
+ * fix something and we should rerun fsck tool again.
+ * Display the full report from fsck. This displays all live and dead
+ * region servers, and all known regions.
+ */
+ void setShouldRerun() {
+ rerun = true;
+ }
+
+ boolean shouldRerun() {
+ return rerun;
+ }
+
+ /**
+ * Fix inconsistencies found by fsck. This should try to fix errors (if any)
+ * found by fsck utility.
+ */
+ void setFixErrors() {
+ fix = true;
+ }
+
+ boolean shouldFix() {
+ return fix;
+ }
+
+ /**
* We are interested in only those tables that have not changed their state in
* META during the last few seconds specified by hbase.admin.fsck.timelag
* @param seconds - the time in seconds
@@ -479,6 +855,8 @@
System.err.println(" -timelag {timeInSeconds} Process only regions that " +
" have not experienced any metadata updates in the last " +
" {{timeInSeconds} seconds.");
+ System.err.println(" -fix Try to fix some of the errors.");
+ System.err.println(" -summary Print only summary of the tables and status.");
Runtime.getRuntime().exit(-2);
}
@@ -512,6 +890,10 @@
printUsageAndExit();
}
i++;
+ } else if (cmd.equals("-fix")) {
+ fsck.setFixErrors();
+ } else if (cmd.equals("-summary")) {
+ fsck.setSummary();
} else {
String str = "Unknown command line option : " + cmd;
LOG.info(str);
@@ -521,6 +903,14 @@
}
// do the real work of fsck
int code = fsck.doWork();
+ // If we have changed the HBase state it is better to run fsck again
+ // to see if we haven't broken something else in the process.
+ // We run it only once more because otherwise we can easily fall into
+ // an infinite loop.
+ if (fsck.shouldRerun()) {
+ code = fsck.doWork();
+ }
+
Runtime.getRuntime().exit(code);
}
}
Index: src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0)
+++ src/main/java/org/apache/hadoop/hbase/client/HBaseFsckRepair.java (revision 0)
@@ -0,0 +1,114 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.ipc.HMasterInterface;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper;
+import org.apache.zookeeper.KeeperException;
+
+public class HBaseFsckRepair {
+
+ public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
+ List servers)
+ throws IOException {
+
+ HRegionInfo actualRegion = new HRegionInfo(region);
+
+ // Clear status in master and zk
+ clearInMaster(conf, actualRegion);
+ clearInZK(conf, actualRegion);
+
+ // Close region on the servers
+ for(HServerAddress server : servers) {
+ closeRegion(conf, server, actualRegion);
+ }
+
+ // It's unassigned so fix it as such
+ fixUnassigned(conf, actualRegion);
+ }
+
+ public static void fixUnassigned(Configuration conf, HRegionInfo region)
+ throws IOException {
+
+ HRegionInfo actualRegion = new HRegionInfo(region);
+
+ // Clear status in master and zk
+ clearInMaster(conf, actualRegion);
+ clearInZK(conf, actualRegion);
+
+ // Clear assignment in META or ROOT
+ clearAssignment(conf, actualRegion);
+ }
+
+ private static void clearInMaster(Configuration conf, HRegionInfo region)
+ throws IOException {
+ System.out.println("Region being cleared in master: " + region);
+ HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
+ long masterVersion =
+ master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
+ System.out.println("Master protocol version: " + masterVersion);
+ try {
+ master.clearFromTransition(region);
+ } catch (Exception e) {}
+ }
+
+ private static void clearInZK(Configuration conf, HRegionInfo region)
+ throws IOException {
+ ZooKeeperWrapper zkw = HConnectionManager.getConnection(conf).getZooKeeperWrapper();
+// try {
+ zkw.deleteUnassignedRegion(region.getEncodedName());
+// } catch(KeeperException ke) {}
+ }
+
+ private static void closeRegion(Configuration conf, HServerAddress server,
+ HRegionInfo region)
+ throws IOException {
+ HRegionInterface rs =
+ HConnectionManager.getConnection(conf).getHRegionConnection(server);
+ rs.closeRegion(region, false);
+ }
+
+ private static void clearAssignment(Configuration conf,
+ HRegionInfo region)
+ throws IOException {
+ HTable ht = null;
+ if (region.isMetaTable()) {
+ // Clear assignment in ROOT
+ ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
+ }
+ else {
+ // Clear assignment in META
+ ht = new HTable(conf, HConstants.META_TABLE_NAME);
+ }
+ Delete del = new Delete(region.getRegionName());
+ del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+ del.deleteColumns(HConstants.CATALOG_FAMILY,
+ HConstants.STARTCODE_QUALIFIER);
+ ht.delete(del);
+ }
+}