From a0eff40fbba60dc63ced52f22d0cf49b9e7ccee1 Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Fri, 25 May 2018 11:07:39 -0700 Subject: [PATCH] HBASE-19121: HBCK2 --- .../java/org/apache/hadoop/hbase/util/FSUtils.java | 24 +- .../org/apache/hadoop/hbase/util/HBaseFsck.java | 766 ++++++--------------- .../org/apache/hadoop/hbase/util/HBaseFsck2.java | 421 +++++++++++ .../apache/hadoop/hbase/util/HBaseFsckUtil.java | 474 +++++++++++++ .../hbase/util/hbck/HFileCorruptionChecker.java | 41 +- .../hadoop/hbase/util/hbck/ReplicationChecker.java | 12 +- .../hadoop/hbase/client/TestMetaWithReplicas.java | 2 +- .../hadoop/hbase/util/BaseTestHBaseFsck.java | 32 +- .../apache/hadoop/hbase/util/TestHBaseFsck2.java | 135 ++++ .../hbase/util/TestHBaseFsckReplication.java | 2 +- .../hadoop/hbase/util/hbck/HbckTestingUtil.java | 8 +- 11 files changed, 1305 insertions(+), 612 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck2.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckUtil.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck2.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 53db140b28da72254103dd2267eff1c71dbe9e4c..f135e8ce2f6e1aa69ac28575de91a93be04a2d31 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -74,7 +74,7 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.security.AccessDeniedException; -import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter; +import org.apache.hadoop.hbase.util.HBaseFsckUtil.Reporter; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -1199,7 +1199,7 @@ public abstract class FSUtils extends CommonFSUtils { * @param tableName name of the table to scan. * @param sfFilter optional path filter to apply to store files * @param executor optional executor service to parallelize this operation - * @param errors ErrorReporter instance or null + * @param reporter HBaseFsckUtil.Reporter instance or null * @return Map keyed by StoreFile name with a value of the full Path. * @throws IOException When scanning the directory fails. * @throws InterruptedException @@ -1207,7 +1207,7 @@ public abstract class FSUtils extends CommonFSUtils { public static Map getTableStoreFilePathMap( Map resultMap, final FileSystem fs, final Path hbaseRootDir, TableName tableName, final PathFilter sfFilter, - ExecutorService executor, final ErrorReporter errors) throws IOException, InterruptedException { + ExecutorService executor, final Reporter reporter) throws IOException, InterruptedException { final Map finalResultMap = resultMap == null ? new ConcurrentHashMap<>(128, 0.75f, 32) : resultMap; @@ -1228,8 +1228,8 @@ public abstract class FSUtils extends CommonFSUtils { final List> futures = new ArrayList<>(regionDirs.size()); for (FileStatus regionDir : regionDirs) { - if (null != errors) { - errors.progress(); + if (null != reporter) { + reporter.progress(); } final Path dd = regionDir.getPath(); @@ -1252,8 +1252,8 @@ public abstract class FSUtils extends CommonFSUtils { return; } for (FileStatus familyDir : familyDirs) { - if (null != errors) { - errors.progress(); + if (null != reporter) { + reporter.progress(); } Path family = familyDir.getPath(); if (family.getName().equals(HConstants.RECOVERED_EDITS_DIR)) { @@ -1263,8 +1263,8 @@ public abstract class FSUtils extends CommonFSUtils { // put in map FileStatus[] familyStatus = fs.listStatus(family); for (FileStatus sfStatus : familyStatus) { - if (null != errors) { - errors.progress(); + if (null != reporter) { + reporter.progress(); } Path sf = sfStatus.getPath(); if (sfFilter == null || sfFilter.accept(sf)) { @@ -1363,14 +1363,14 @@ public abstract class FSUtils extends CommonFSUtils { * @param hbaseRootDir The root directory to scan. * @param sfFilter optional path filter to apply to store files * @param executor optional executor service to parallelize this operation - * @param errors ErrorReporter instance or null + * @param reporter HBaseFsckUtil.Reporter instance or null * @return Map keyed by StoreFile name with a value of the full Path. * @throws IOException When scanning the directory fails. * @throws InterruptedException */ public static Map getTableStoreFilePathMap( final FileSystem fs, final Path hbaseRootDir, PathFilter sfFilter, - ExecutorService executor, ErrorReporter errors) + ExecutorService executor, Reporter reporter) throws IOException, InterruptedException { ConcurrentHashMap map = new ConcurrentHashMap<>(1024, 0.75f, 32); @@ -1380,7 +1380,7 @@ public abstract class FSUtils extends CommonFSUtils { // only include the directory paths to tables for (Path tableDir : FSUtils.getTableDirs(fs, hbaseRootDir)) { getTableStoreFilePathMap(map, fs, hbaseRootDir, - FSUtils.getTableName(tableDir), sfFilter, executor, errors); + FSUtils.getTableName(tableDir), sfFilter, executor, reporter); } return map; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 820a4e059ede079a5b6b5af4c4af76b85f133ca3..a785dd693b84c530e2db64b9fb92f25fbfc0556c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -126,7 +126,7 @@ import org.apache.hadoop.hbase.replication.ReplicationUtils; import org.apache.hadoop.hbase.security.AccessDeniedException; import org.apache.hadoop.hbase.security.UserProvider; import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator; -import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; +import org.apache.hadoop.hbase.util.HBaseFsckUtil.Reporter.ERROR_CODE; import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.hadoop.hbase.util.hbck.ReplicationChecker; import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler; @@ -217,15 +217,10 @@ public class HBaseFsck extends Configured implements Closeable { private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2; private static final int DEFAULT_MAX_MERGE = 5; private static final String TO_BE_LOADED = "to_be_loaded"; - private static final String HBCK_LOCK_FILE = "hbase-hbck.lock"; - private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5; - private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds - private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD. // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for // AlreadyBeingCreatedException which is implies timeout on this operations up to // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). - private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5; private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds @@ -243,12 +238,6 @@ public class HBaseFsck extends Configured implements Closeable { private long startMillis = EnvironmentEdgeManager.currentTime(); private HFileCorruptionChecker hfcc; private int retcode = 0; - private Path HBCK_LOCK_PATH; - private FSDataOutputStream hbckOutFd; - // This lock is to prevent cleanup of balancer resources twice between - // ShutdownHook and the main code. We cleanup only if the connect() is - // successful - private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false); // Unsupported options in HBase 2.0+ private static final Set unsupportedOptionsInV2 = Sets.newHashSet("-fix", @@ -298,7 +287,7 @@ public class HBaseFsck extends Configured implements Closeable { /********* * State *********/ - final private ErrorReporter errors; + private HBaseFsckUtil.Reporter reporter; int fixes = 0; /** @@ -329,7 +318,6 @@ public class HBaseFsck extends Configured implements Closeable { private Map> orphanTableDirs = new HashMap<>(); private Map tableStates = new HashMap<>(); - private final RetryCounterFactory lockFileRetryCounterFactory; private final RetryCounterFactory createZNodeRetryCounterFactory; private Map> skippedRegions = new HashMap<>(); @@ -367,14 +355,8 @@ public class HBaseFsck extends Configured implements Closeable { public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException { super(conf); - errors = getErrorReporter(getConf()); + reporter = HBaseFsckUtil.getReporter(getConf()); this.executor = exec; - lockFileRetryCounterFactory = new RetryCounterFactory( - getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), - getConf().getInt( - "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), - getConf().getInt( - "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); createZNodeRetryCounterFactory = new RetryCounterFactory( getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS), getConf().getInt( @@ -386,210 +368,70 @@ public class HBaseFsck extends Configured implements Closeable { zkw = createZooKeeperWatcher(); } - private class FileLockCallable implements Callable { - RetryCounter retryCounter; - - public FileLockCallable(RetryCounter retryCounter) { - this.retryCounter = retryCounter; - } - @Override - public FSDataOutputStream call() throws IOException { - try { - FileSystem fs = FSUtils.getCurrentFileSystem(getConf()); - FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(), - HConstants.DATA_FILE_UMASK_KEY); - Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY); - fs.mkdirs(tmpDir); - HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE); - final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms); - out.writeBytes(InetAddress.getLocalHost().toString()); - out.flush(); - return out; - } catch(RemoteException e) { - if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){ - return null; - } else { - throw e; - } - } - } - - private FSDataOutputStream createFileWithRetries(final FileSystem fs, - final Path hbckLockFilePath, final FsPermission defaultPerms) - throws IOException { - - IOException exception = null; - do { - try { - return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false); - } catch (IOException ioe) { - LOG.info("Failed to create lock file " + hbckLockFilePath.getName() - + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of " - + retryCounter.getMaxAttempts()); - LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), - ioe); - try { - exception = ioe; - retryCounter.sleepUntilNextRetry(); - } catch (InterruptedException ie) { - throw (InterruptedIOException) new InterruptedIOException( - "Can't create lock file " + hbckLockFilePath.getName()) - .initCause(ie); - } - } - } while (retryCounter.shouldRetry()); - - throw exception; - } - } - - /** - * This method maintains a lock using a file. If the creation fails we return null - * - * @return FSDataOutputStream object corresponding to the newly opened lock file - * @throws IOException if IO failure occurs - */ - private FSDataOutputStream checkAndMarkRunningHbck() throws IOException { - RetryCounter retryCounter = lockFileRetryCounterFactory.create(); - FileLockCallable callable = new FileLockCallable(retryCounter); - ExecutorService executor = Executors.newFixedThreadPool(1); - FutureTask futureTask = new FutureTask<>(callable); - executor.execute(futureTask); - final int timeoutInSeconds = getConf().getInt( - "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT); - FSDataOutputStream stream = null; + public void connect() { try { - stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS); - } catch (ExecutionException ee) { - LOG.warn("Encountered exception when opening lock file", ee); - } catch (InterruptedException ie) { - LOG.warn("Interrupted when opening lock file", ie); - Thread.currentThread().interrupt(); - } catch (TimeoutException exception) { - // took too long to obtain lock - LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock"); - futureTask.cancel(true); - } finally { - executor.shutdownNow(); - } - return stream; - } - - private void unlockHbck() { - if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) { - RetryCounter retryCounter = lockFileRetryCounterFactory.create(); - do { - try { - IOUtils.closeQuietly(hbckOutFd); - FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), - HBCK_LOCK_PATH, true); - LOG.info("Finishing hbck"); - return; - } catch (IOException ioe) { - LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try=" - + (retryCounter.getAttemptTimes() + 1) + " of " - + retryCounter.getMaxAttempts()); - LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe); - try { - retryCounter.sleepUntilNextRetry(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - LOG.warn("Interrupted while deleting lock file" + - HBCK_LOCK_PATH); - return; - } + HBaseFsckUtil.init(getConf(), this, isExclusive(), summary, new Thread() { + // Add a shutdown hook to this thread, in case user tries to + // kill the hbck with a ctrl-c, we want to cleanup the hbck znode so + // it is available for further calls + @Override + public void run() { + cleanupHbckZnode(); } - } while (retryCounter.shouldRetry()); + }); + connection = HBaseFsckUtil.getConnection(); + admin = HBaseFsckUtil.getAdmin(); + status = HBaseFsckUtil.getStatus(); + meta = HBaseFsckUtil.getMeta(); + } catch (IOException|ClassNotFoundException e) { + LOG.error("Failed to connect to master. ", e); + setRetCode(-1); } } - /** - * To repair region consistency, one must call connect() in order to repair - * online state. - */ - public void connect() throws IOException { - - if (isExclusive()) { - // Grab the lock - hbckOutFd = checkAndMarkRunningHbck(); - if (hbckOutFd == null) { - setRetCode(-1); - LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + - "[If you are sure no other instance is running, delete the lock file " + - HBCK_LOCK_PATH + " and rerun the tool]"); - throw new IOException("Duplicate hbck - Abort"); - } - - // Make sure to cleanup the lock - hbckLockCleanup.set(true); - } - - - // Add a shutdown hook to this thread, in case user tries to - // kill the hbck with a ctrl-c, we want to cleanup the lock so that - // it is available for further calls - Runtime.getRuntime().addShutdownHook(new Thread() { - @Override - public void run() { - IOUtils.closeQuietly(HBaseFsck.this); - cleanupHbckZnode(); - unlockHbck(); - } - }); - - LOG.info("Launching hbck"); - - connection = (ClusterConnection)ConnectionFactory.createConnection(getConf()); - admin = connection.getAdmin(); - meta = connection.getTable(TableName.META_TABLE_NAME); - status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS, - Option.DEAD_SERVERS, Option.MASTER, Option.BACKUP_MASTERS, - Option.REGIONS_IN_TRANSITION, Option.HBASE_VERSION)); - } - /** * Get deployed regions according to the region servers. */ private void loadDeployedRegions() throws IOException, InterruptedException { // From the master, get a list of all known live region servers Collection regionServers = status.getLiveServerMetrics().keySet(); - errors.print("Number of live region servers: " + regionServers.size()); + reporter.details("Number of live region servers: " + regionServers.size()); if (details) { for (ServerName rsinfo: regionServers) { - errors.print(" " + rsinfo.getServerName()); + reporter.details(" " + rsinfo.getServerName()); } } // From the master, get a list of all dead region servers Collection deadRegionServers = status.getDeadServerNames(); - errors.print("Number of dead region servers: " + deadRegionServers.size()); + reporter.details("Number of dead region servers: " + deadRegionServers.size()); if (details) { for (ServerName name: deadRegionServers) { - errors.print(" " + name); + reporter.details(" " + name); } } // Print the current master name and state - errors.print("Master: " + status.getMasterName()); + reporter.details("Master: " + status.getMasterName()); // Print the list of all backup masters Collection backupMasters = status.getBackupMasterNames(); - errors.print("Number of backup masters: " + backupMasters.size()); + reporter.details("Number of backup masters: " + backupMasters.size()); if (details) { for (ServerName name: backupMasters) { - errors.print(" " + name); + reporter.details(" " + name); } } - errors.print("Average load: " + status.getAverageLoad()); - errors.print("Number of requests: " + status.getRequestCount()); - errors.print("Number of regions: " + status.getRegionCount()); + reporter.details("Average load: " + status.getAverageLoad()); + reporter.details("Number of requests: " + status.getRequestCount()); + reporter.details("Number of regions: " + status.getRegionCount()); List rits = status.getRegionStatesInTransition(); - errors.print("Number of regions in transition: " + rits.size()); + reporter.details("Number of regions in transition: " + rits.size()); if (details) { for (RegionState state: rits) { - errors.print(" " + state.toDescriptiveString()); + reporter.details(" " + state.toDescriptiveString()); } } @@ -606,7 +448,7 @@ public class HBaseFsck extends Configured implements Closeable { regionInfoMap.clear(); emptyRegionInfoQualifiers.clear(); tableStates.clear(); - errors.clear(); + reporter.clear(); tablesInfo.clear(); orphanHdfsDirs.clear(); skippedRegions.clear(); @@ -668,7 +510,7 @@ public class HBaseFsck extends Configured implements Closeable { } else { errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency."; } - errors.reportError(errorMsg + " Exiting..."); + reporter.error(errorMsg + " Exiting..."); return -2; } // Not going with further consistency check for tables when hbase:meta itself is not consistent. @@ -709,7 +551,7 @@ public class HBaseFsck extends Configured implements Closeable { // Check integrity (does not fix) checkIntegrity(); - return errors.getErrorList().size(); + return reporter.getErrorList().size(); } /** @@ -771,7 +613,7 @@ public class HBaseFsck extends Configured implements Closeable { public int onlineHbck() throws IOException, KeeperException, InterruptedException, ReplicationException { // print hbase server version - errors.print("Version: " + status.getHBaseVersion()); + reporter.details("Version: " + status.getHBaseVersion()); // Clean start clearState(); @@ -802,11 +644,11 @@ public class HBaseFsck extends Configured implements Closeable { cleanupHbckZnode(); // Remove the hbck lock - unlockHbck(); + HBaseFsckUtil.unlockHbck(); // Print table summary printTableSummary(tablesInfo); - return errors.summarize(); + return reporter.summarize(); } public static byte[] keyOnly (byte[] b) { @@ -822,7 +664,6 @@ public class HBaseFsck extends Configured implements Closeable { public void close() throws IOException { try { cleanupHbckZnode(); - unlockHbck(); } catch (Exception io) { LOG.warn(io.toString(), io); } finally { @@ -830,9 +671,6 @@ public class HBaseFsck extends Configured implements Closeable { zkw.close(); zkw = null; } - IOUtils.closeQuietly(admin); - IOUtils.closeQuietly(meta); - IOUtils.closeQuietly(connection); } } @@ -923,8 +761,8 @@ public class HBaseFsck extends Configured implements Closeable { currentRegionBoundariesInformation.metaLastKey) < 0; } if (!valid) { - errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", - tablesInfo.get(regionInfo.getTable())); + reporter.error(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", + regionInfo.getTable()); LOG.warn("Region's boundaries not aligned between stores and META for:"); LOG.warn(Objects.toString(currentRegionBoundariesInformation)); } @@ -1053,12 +891,12 @@ public class HBaseFsck extends Configured implements Closeable { LOG.info("Loading HBase regioninfo from HDFS..."); loadHdfsRegionDirs(); // populating regioninfo table. - int errs = errors.getErrorList().size(); + int errs = reporter.getErrorList().size(); // First time just get suggestions. tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. checkHdfsIntegrity(false, false); - if (errors.getErrorList().size() == errs) { + if (reporter.getErrorList().size() == errs) { LOG.info("No integrity errors. We are done with this phase. Glorious."); return 0; } @@ -1085,7 +923,7 @@ public class HBaseFsck extends Configured implements Closeable { tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps()); } - return errors.getErrorList().size(); + return reporter.getErrorList().size(); } /** @@ -1103,15 +941,15 @@ public class HBaseFsck extends Configured implements Closeable { FileSystem fs = hbaseRoot.getFileSystem(conf); LOG.info("Computing mapping of all store files"); Map allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, - new FSUtils.ReferenceFileFilter(fs), executor, errors); - errors.print(""); + new FSUtils.ReferenceFileFilter(fs), executor, reporter); + reporter.details(""); LOG.info("Validating mapping using HDFS state"); for (Path path: allFiles.values()) { Path referredToFile = StoreFileInfo.getReferredToFile(path); if (fs.exists(referredToFile)) continue; // good, expected // Found a lingering reference file - errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE, + reporter.error(ERROR_CODE.LINGERING_REFERENCE_HFILE, "Found lingering reference file " + path); if (!shouldFixReferenceFiles()) continue; @@ -1156,8 +994,8 @@ public class HBaseFsck extends Configured implements Closeable { FileSystem fs = hbaseRoot.getFileSystem(conf); LOG.info("Computing mapping of all link files"); Map allFiles = FSUtils - .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors); - errors.print(""); + .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, reporter); + reporter.details(""); LOG.info("Validating mapping using HDFS state"); for (Path path : allFiles.values()) { @@ -1166,7 +1004,7 @@ public class HBaseFsck extends Configured implements Closeable { if (actualLink.exists(fs)) continue; // good, expected // Found a lingering HFileLink - errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path); + reporter.error(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path); if (!shouldFixHFileLinks()) continue; // Now, trying to fix it since requested @@ -1185,10 +1023,10 @@ public class HBaseFsck extends Configured implements Closeable { // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure. Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil - .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()), - HFileLink.getReferencedRegionName(path.getName().toString()), + .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName()), + HFileLink.getReferencedRegionName(path.getName()), path.getParent().getName()), - HFileLink.getReferencedHFileName(path.getName().toString())); + HFileLink.getReferencedHFileName(path.getName())); success = sidelineFile(fs, hbaseRoot, backRefPath); if (!success) { @@ -1216,11 +1054,11 @@ public class HBaseFsck extends Configured implements Closeable { * TODO -- need to add tests for this. */ private void reportEmptyMetaCells() { - errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + + reporter.details("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + emptyRegionInfoQualifiers.size()); if (details) { for (Result r: emptyRegionInfoQualifiers) { - errors.print(" " + r); + reporter.details(" " + r); } } } @@ -1231,13 +1069,13 @@ public class HBaseFsck extends Configured implements Closeable { private void reportTablesInFlux() { AtomicInteger numSkipped = new AtomicInteger(0); TableDescriptor[] allTables = getTables(numSkipped); - errors.print("Number of Tables: " + allTables.length); + reporter.details("Number of Tables: " + allTables.length); if (details) { if (numSkipped.get() > 0) { - errors.detail("Number of Tables in flux: " + numSkipped.get()); + reporter.details("Number of Tables in flux: " + numSkipped.get()); } for (TableDescriptor td : allTables) { - errors.detail(" Table: " + td.getTableName() + "\t" + + reporter.details(" Table: " + td.getTableName() + "\t" + (td.isReadOnly() ? "ro" : "rw") + "\t" + (td.isMetaRegion() ? "META" : " ") + "\t" + " families: " + td.getColumnFamilyCount()); @@ -1245,8 +1083,8 @@ public class HBaseFsck extends Configured implements Closeable { } } - public ErrorReporter getErrors() { - return errors; + public HBaseFsckUtil.Reporter getReporter() { + return reporter; } /** @@ -1301,7 +1139,7 @@ public class HBaseFsck extends Configured implements Closeable { List> hbiFutures; for (HbckInfo hbi : hbckInfos) { - WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors); + WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, reporter); hbis.add(work); } @@ -1351,7 +1189,7 @@ public class HBaseFsck extends Configured implements Closeable { if (!orphanTableDirs.containsKey(tableName)) { LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); //should only report once for each table - errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, + reporter.error(ERROR_CODE.NO_TABLEINFO_FILE, "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); Set columns = new HashSet<>(); orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); @@ -1364,7 +1202,7 @@ public class HBaseFsck extends Configured implements Closeable { } loadTableInfosForTablesWithNoRegion(); - errors.print(""); + reporter.details(""); return tablesInfo; } @@ -1414,7 +1252,7 @@ public class HBaseFsck extends Configured implements Closeable { LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows."); for (Result region : emptyRegionInfoQualifiers) { deleteMetaRegion(region.getRow()); - errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL); + reporter.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL); } emptyRegionInfoQualifiers.clear(); } @@ -1556,7 +1394,7 @@ public class HBaseFsck extends Configured implements Closeable { SortedMap tablesInfo) throws IOException { logParallelMerge(); for (TableInfo tInfo : tablesInfo.values()) { - TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); + TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, reporter); tInfo.checkRegionChain(handler); } } @@ -1578,21 +1416,21 @@ public class HBaseFsck extends Configured implements Closeable { LOG.info("Loading HBase regioninfo from HDFS..."); loadHdfsRegionDirs(); // populating regioninfo table. - int errs = errors.getErrorList().size(); + int errs = reporter.getErrorList().size(); tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs. checkHdfsIntegrity(false, false); // make sure ok. - if (errors.getErrorList().size() != errs) { + if (reporter.getErrorList().size() != errs) { // While in error state, iterate until no more fixes possible while(true) { fixes = 0; suggestFixes(tablesInfo); - errors.clear(); + reporter.clear(); loadHdfsRegionInfos(); // update tableInfos based on region info in fs. checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps()); - int errCount = errors.getErrorList().size(); + int errCount = reporter.getErrorList().size(); if (fixes == 0) { if (errCount > 0) { @@ -1669,14 +1507,14 @@ public class HBaseFsck extends Configured implements Closeable { for (TableInfo tInfo : tablesInfo.values()) { TableIntegrityErrorHandler handler; if (fixHoles || fixOverlaps) { - handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), + handler = tInfo.new HDFSIntegrityFixer(tInfo, reporter, getConf(), fixHoles, fixOverlaps); } else { - handler = tInfo.new IntegrityFixSuggester(tInfo, errors); + handler = tInfo.new IntegrityFixSuggester(tInfo, reporter); } if (!tInfo.checkRegionChain(handler)) { // should dump info as well. - errors.report("Found inconsistency in table " + tInfo.getName()); + reporter.report("Found inconsistency in table " + tInfo.getName()); } } return tablesInfo; @@ -1867,7 +1705,7 @@ public class HBaseFsck extends Configured implements Closeable { // verify that version file exists if (!foundVersionFile) { - errors.reportError(ERROR_CODE.NO_VERSION_FILE, + reporter.error(ERROR_CODE.NO_VERSION_FILE, "Version file does not exist in root dir " + rootDir); if (shouldFixVersionFile()) { LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME @@ -1890,7 +1728,7 @@ public class HBaseFsck extends Configured implements Closeable { // for the region-level callables to be serviced. for (FileStatus tableDir : tableDirs) { LOG.debug("Loading region dirs from " +tableDir.getPath()); - WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir); + WorkItemHdfsDir item = new WorkItemHdfsDir(fs, reporter, tableDir); try { item.call(); } catch (ExecutionException e) { @@ -1898,7 +1736,7 @@ public class HBaseFsck extends Configured implements Closeable { tableDir.getPath(), e.getCause()); } } - errors.print(""); + reporter.details(""); } /** @@ -1908,25 +1746,21 @@ public class HBaseFsck extends Configured implements Closeable { RegionLocations rl = connection.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, false); if (rl == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META region was not found in ZooKeeper"); + reporter.error(ERROR_CODE.NULL_META_REGION, "META region was not found in ZooKeeper"); return false; } for (HRegionLocation metaLocation : rl.getRegionLocations()) { // Check if Meta region is valid and existing if (metaLocation == null ) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META region location is null"); + reporter.error(ERROR_CODE.NULL_META_REGION, "META region location is null"); return false; } if (metaLocation.getRegionInfo() == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META location regionInfo is null"); + reporter.error(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null"); return false; } if (metaLocation.getHostname() == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META location hostName is null"); + reporter.error(ERROR_CODE.NULL_META_REGION, "META location hostName is null"); return false; } ServerName sn = metaLocation.getServerName(); @@ -1975,7 +1809,7 @@ public class HBaseFsck extends Configured implements Closeable { // loop to contact each region server in parallel for (ServerName rsinfo: regionServerList) { - workItems.add(new WorkItemRegion(this, rsinfo, errors, connection)); + workItems.add(new WorkItemRegion(this, rsinfo, reporter, connection)); } workFutures = executor.invokeAll(workItems); @@ -2121,13 +1955,13 @@ public class HBaseFsck extends Configured implements Closeable { MetaTableAccessor.deleteTableState(connection, tableName); TableState state = MetaTableAccessor.getTableState(connection, tableName); if (state != null) { - errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, + reporter.error(ERROR_CODE.ORPHAN_TABLE_STATE, tableName + " unable to delete dangling table state " + tableState); } } else if (!checkMetaOnly) { // dangling table state in meta if checkMetaOnly is false. If checkMetaOnly is // true, tableInfo will be null as tablesInfo are not polulated for all tables from hdfs - errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, + reporter.error(ERROR_CODE.ORPHAN_TABLE_STATE, tableName + " has dangling table state " + tableState); } } @@ -2139,11 +1973,11 @@ public class HBaseFsck extends Configured implements Closeable { MetaTableAccessor.updateTableState(connection, tableName, TableState.State.ENABLED); TableState newState = MetaTableAccessor.getTableState(connection, tableName); if (newState == null) { - errors.reportError(ERROR_CODE.NO_TABLE_STATE, + reporter.error(ERROR_CODE.NO_TABLE_STATE, "Unable to change state for table " + tableName + " in meta "); } } else { - errors.reportError(ERROR_CODE.NO_TABLE_STATE, + reporter.error(ERROR_CODE.NO_TABLE_STATE, tableName + " has no state in meta "); } } @@ -2165,7 +1999,7 @@ public class HBaseFsck extends Configured implements Closeable { FSUtils.checkAccess(ugi, file, FsAction.WRITE); } catch (AccessDeniedException ace) { LOG.warn("Got AccessDeniedException when preCheckPermission ", ace); - errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName() + reporter.error(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName() + " does not have write perms to " + file.getPath() + ". Please rerun hbck as hdfs user " + file.getOwner()); throw ace; @@ -2322,7 +2156,7 @@ public class HBaseFsck extends Configured implements Closeable { for (HRegionLocation h : rl.getRegionLocations()) { ServerName serverName = h.getServerName(); if (serverName == null) { - errors.reportError("Unable to close region " + reporter.error("Unable to close region " + hi.getRegionNameAsString() + " because meta does not " + "have handle to reach it."); continue; @@ -2345,7 +2179,7 @@ public class HBaseFsck extends Configured implements Closeable { KeeperException, InterruptedException { // If we are trying to fix the errors if (shouldFixAssignments()) { - errors.print(msg); + reporter.details(msg); undeployRegions(hbi); setShouldRerun(); RegionInfo hri = hbi.getHdfsHRI(); @@ -2416,7 +2250,7 @@ public class HBaseFsck extends Configured implements Closeable { // We shouldn't have record of this region at all then! assert false : "Entry for region with no data"; } else if (!inMeta && !inHdfs && isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region " + reporter.error(ERROR_CODE.NOT_IN_META_HDFS, "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " + "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); if (shouldFixAssignments()) { @@ -2432,7 +2266,7 @@ public class HBaseFsck extends Configured implements Closeable { + " got merge recently, its file(s) will be cleaned by CatalogJanitor later"); return; } - errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + reporter.error(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server"); // restore region consistency of an adopted orphan @@ -2483,7 +2317,7 @@ public class HBaseFsck extends Configured implements Closeable { } } else if (!inMeta && inHdfs && isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName + reporter.error(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn)); debugLsr(hbi.getHdfsRegionDir()); if (hbi.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { @@ -2533,7 +2367,7 @@ public class HBaseFsck extends Configured implements Closeable { return; } - errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " + reporter.error(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " + descriptiveName + " is a split parent in META, in HDFS, " + "and not deployed on any region server. This could be transient, " + "consider to run the catalog janitor first!"); @@ -2542,21 +2376,21 @@ public class HBaseFsck extends Configured implements Closeable { resetSplitParent(hbi); } } else if (inMeta && !inHdfs && !isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + reporter.error(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName + " found in META, but not in HDFS " + "or deployed on any region server."); if (shouldFixMeta()) { deleteMetaRegion(hbi); } } else if (inMeta && !inHdfs && isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName + reporter.error(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); // We treat HDFS as ground truth. Any information in meta is transient // and equivalent data can be regenerated. So, lets unassign and remove // these problems from META. if (shouldFixAssignments()) { - errors.print("Trying to fix unassigned region..."); + reporter.details("Trying to fix unassigned region..."); undeployRegions(hbi); } if (shouldFixMeta()) { @@ -2564,43 +2398,43 @@ public class HBaseFsck extends Configured implements Closeable { deleteMetaRegion(hbi); } } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { - errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName + reporter.error(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName + " not deployed on any region server."); tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { - errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, + reporter.error(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "Region " + descriptiveName + " should not be deployed according " + "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); if (shouldFixAssignments()) { - errors.print("Trying to close the region " + descriptiveName); + reporter.details("Trying to close the region " + descriptiveName); setShouldRerun(); HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); } } else if (inMeta && inHdfs && isMultiplyDeployed) { - errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName + reporter.error(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer + " but is multiply assigned to region servers " + Joiner.on(", ").join(hbi.deployedOn)); // If we are trying to fix the errors if (shouldFixAssignments()) { - errors.print("Trying to fix assignment error..."); + reporter.details("Trying to fix assignment error..."); setShouldRerun(); HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); } } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { - errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region " + reporter.error(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region " + descriptiveName + " listed in hbase:meta on region server " + hbi.metaEntry.regionServer + " but found on region server " + hbi.deployedOn.get(0)); // If we are trying to fix the errors if (shouldFixAssignments()) { - errors.print("Trying to fix assignment error..."); + reporter.details("Trying to fix assignment error..."); setShouldRerun(); HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI()); } } else { - errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName + + reporter.error(ERROR_CODE.UNKNOWN, "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta + " inHdfs=" + inHdfs + @@ -2626,22 +2460,22 @@ public class HBaseFsck extends Configured implements Closeable { // this assumes that consistency check has run loadMetaEntry Path p = hbi.getHdfsRegionDir(); if (p == null) { - errors.report("No regioninfo in Meta or HDFS. " + hbi); + reporter.report("No regioninfo in Meta or HDFS. " + hbi); } // TODO test. continue; } if (hbi.metaEntry.regionServer == null) { - errors.detail("Skipping region because no region server: " + hbi); + reporter.details("Skipping region because no region server: " + hbi); continue; } if (hbi.metaEntry.isOffline()) { - errors.detail("Skipping region because it is offline: " + hbi); + reporter.details("Skipping region because it is offline: " + hbi); continue; } if (hbi.containsOnlyHdfsEdits()) { - errors.detail("Skipping region because it only contains edits" + hbi); + reporter.details("Skipping region because it only contains edits" + hbi); continue; } @@ -2673,9 +2507,9 @@ public class HBaseFsck extends Configured implements Closeable { logParallelMerge(); for (TableInfo tInfo : tablesInfo.values()) { - TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); + TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, reporter); if (!tInfo.checkRegionChain(handler)) { - errors.report("Found inconsistency in table " + tInfo.getName()); + reporter.report("Found inconsistency in table " + tInfo.getName()); } } return tablesInfo; @@ -2846,12 +2680,11 @@ public class HBaseFsck extends Configured implements Closeable { // if not the absolute end key, check for cycle if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) { - errors.reportError( - ERROR_CODE.REGION_CYCLE, - String.format("The endkey for this region comes before the " - + "startkey, startkey=%s, endkey=%s", - Bytes.toStringBinary(hir.getStartKey()), - Bytes.toStringBinary(hir.getEndKey())), this, hir); + reporter.error(ERROR_CODE.REGION_CYCLE, + String.format("The endkey for this region comes before the " + + "startkey, startkey=%s, endkey=%s", + Bytes.toStringBinary(hir.getStartKey()), Bytes.toStringBinary(hir.getEndKey())), + this.getName(), hir.getRegionNameAsString()); backwards.add(hir); return; } @@ -2891,68 +2724,66 @@ public class HBaseFsck extends Configured implements Closeable { } private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl { - ErrorReporter errors; + HBaseFsckUtil.Reporter reporter; - IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) { - this.errors = errors; + IntegrityFixSuggester(TableInfo ti, HBaseFsckUtil.Reporter reporter) { + this.reporter = reporter; setTableInfo(ti); } @Override public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{ - errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, + reporter.error(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, "First region should start with an empty key. You need to " + " create a new region and regioninfo in HDFS to plug the hole.", - getTableInfo(), hi); + getTableInfo().getName(), hi.getRegionNameAsString()); } @Override public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { - errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, + reporter.error(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, "Last region should end with an empty key. You need to " - + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo()); + + "create a new region and regioninfo in HDFS to plug the hole.", + getTableInfo().getName()); } @Override public void handleDegenerateRegion(HbckInfo hi) throws IOException{ - errors.reportError(ERROR_CODE.DEGENERATE_REGION, - "Region has the same start and end key.", getTableInfo(), hi); + reporter.error(ERROR_CODE.DEGENERATE_REGION, + "Region has the same start and end key.", getTableInfo().getName(), + hi.getRegionNameAsString()); } @Override public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{ byte[] key = r1.getStartKey(); // dup start key - errors.reportError(ERROR_CODE.DUPE_STARTKEYS, - "Multiple regions have the same startkey: " - + Bytes.toStringBinary(key), getTableInfo(), r1); - errors.reportError(ERROR_CODE.DUPE_STARTKEYS, - "Multiple regions have the same startkey: " - + Bytes.toStringBinary(key), getTableInfo(), r2); + reporter.error(ERROR_CODE.DUPE_STARTKEYS, "Multiple regions have the same startkey: " + + Bytes.toStringBinary(key), getTableInfo().getName(), r1.getRegionNameAsString()); + reporter.error(ERROR_CODE.DUPE_STARTKEYS, "Multiple regions have the same startkey: " + + Bytes.toStringBinary(key), getTableInfo().getName(), r2.getRegionNameAsString()); } @Override public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{ byte[] key = r1.getStartKey(); // dup start key - errors.reportError(ERROR_CODE.DUPE_ENDKEYS, - "Multiple regions have the same regionID: " - + Bytes.toStringBinary(key), getTableInfo(), r1); - errors.reportError(ERROR_CODE.DUPE_ENDKEYS, - "Multiple regions have the same regionID: " - + Bytes.toStringBinary(key), getTableInfo(), r2); + reporter.error(ERROR_CODE.DUPE_ENDKEYS, "Multiple regions have the same regionID: " + + Bytes.toStringBinary(key), getTableInfo().getName(), r1.getRegionNameAsString()); + reporter.error(ERROR_CODE.DUPE_ENDKEYS, "Multiple regions have the same regionID: " + + Bytes.toStringBinary(key), getTableInfo().getName(), r2.getRegionNameAsString()); } @Override public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{ - errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, + reporter.error(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, "There is an overlap in the region chain.", - getTableInfo(), hi1, hi2); + getTableInfo().getName(), hi1.getRegionNameAsString(), hi2.getRegionNameAsString()); } @Override public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{ - errors.reportError( + reporter.error( ERROR_CODE.HOLE_IN_REGION_CHAIN, "There is a hole in the region chain between " + Bytes.toStringBinary(holeStart) + " and " @@ -2979,9 +2810,9 @@ public class HBaseFsck extends Configured implements Closeable { boolean fixOverlaps = true; - HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf, + HDFSIntegrityFixer(TableInfo ti, HBaseFsckUtil.Reporter reporter, Configuration conf, boolean fixHoles, boolean fixOverlaps) { - super(ti, errors); + super(ti, reporter); this.conf = conf; this.fixOverlaps = fixOverlaps; // TODO properly use fixHoles @@ -2994,10 +2825,10 @@ public class HBaseFsck extends Configured implements Closeable { */ @Override public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException { - errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, + reporter.error(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, "First region should start with an empty key. Creating a new " + "region and regioninfo in HDFS to plug the hole.", - getTableInfo(), next); + getTableInfo().getName(), next.getRegionNameAsString()); TableDescriptor htd = getTableInfo().getHTD(); // from special EMPTY_START_ROW to next region's startKey RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName()) @@ -3014,9 +2845,9 @@ public class HBaseFsck extends Configured implements Closeable { @Override public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { - errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, + reporter.error(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, "Last region should end with an empty key. Creating a new " - + "region and regioninfo in HDFS to plug the hole.", getTableInfo()); + + "region and regioninfo in HDFS to plug the hole.", getTableInfo().getName()); TableDescriptor htd = getTableInfo().getHTD(); // from curEndKey to EMPTY_START_ROW RegionInfo newRegion = RegionInfoBuilder.newBuilder(htd.getTableName()) @@ -3036,7 +2867,7 @@ public class HBaseFsck extends Configured implements Closeable { */ @Override public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException { - errors.reportError( + reporter.error( ERROR_CODE.HOLE_IN_REGION_CHAIN, "There is a hole in the region chain between " + Bytes.toStringBinary(holeStartKey) + " and " @@ -3335,7 +3166,7 @@ public class HBaseFsck extends Configured implements Closeable { if (isTableDisabled(this.tableName)) { return true; } - int originalErrorsCount = errors.getErrorList().size(); + int originalErrorsCount = reporter.getErrorList().size(); Multimap regions = sc.calcCoverage(); SortedSet splits = sc.getSplits(); @@ -3439,23 +3270,23 @@ public class HBaseFsck extends Configured implements Closeable { if (details) { // do full region split map dump - errors.print("---- Table '" + this.tableName + reporter.details("---- Table '" + this.tableName + "': region split map"); dump(splits, regions); - errors.print("---- Table '" + this.tableName + reporter.details("---- Table '" + this.tableName + "': overlap groups"); dumpOverlapProblems(overlapGroups); - errors.print("There are " + overlapGroups.keySet().size() + reporter.details("There are " + overlapGroups.keySet().size() + " overlap groups with " + overlapGroups.size() + " overlapping regions"); } if (!sidelinedRegions.isEmpty()) { LOG.warn("Sidelined big overlapped regions, please bulk load them!"); - errors.print("---- Table '" + this.tableName + reporter.details("---- Table '" + this.tableName + "': sidelined big overlapped regions"); dumpSidelinedRegions(sidelinedRegions); } - return errors.getErrorList().size() == originalErrorsCount; + return reporter.getErrorList().size() == originalErrorsCount; } private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey) @@ -3505,7 +3336,7 @@ public class HBaseFsck extends Configured implements Closeable { sb.append("[ "+ r.toString() + ", " + Bytes.toStringBinary(r.getEndKey())+ "]\t"); } - errors.print(sb.toString()); + reporter.details(sb.toString()); } } } @@ -3514,12 +3345,12 @@ public class HBaseFsck extends Configured implements Closeable { // we display this way because the last end key should be displayed as // well. for (byte[] k : regions.keySet()) { - errors.print(Bytes.toStringBinary(k) + ":"); + reporter.details(Bytes.toStringBinary(k) + ":"); for (HbckInfo r : regions.get(k)) { - errors.print("[ " + r.toString() + ", " + reporter.details("[ " + r.toString() + ", " + Bytes.toStringBinary(r.getEndKey()) + "]"); } - errors.print("----"); + reporter.details("----"); } } @@ -3527,9 +3358,9 @@ public class HBaseFsck extends Configured implements Closeable { for (Map.Entry entry: regions.entrySet()) { TableName tableName = entry.getValue().getTableName(); Path path = entry.getKey(); - errors.print("This sidelined region dir should be bulk loaded: " + reporter.details("This sidelined region dir should be bulk loaded: " + path.toString()); - errors.print("Bulk load command looks like: " + reporter.details("Bulk load command looks like: " + "hbase org.apache.hadoop.hbase.tool.LoadIncrementalHFiles " + path.toUri().getPath() + " "+ tableName); } @@ -3597,7 +3428,7 @@ public class HBaseFsck extends Configured implements Closeable { } private void checkAndFixReplication() throws ReplicationException { - ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, errors); + ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, reporter); checker.checkUnDeletedQueues(); if (checker.hasUnDeletedQueues() && this.fixReplication) { @@ -3638,11 +3469,11 @@ public class HBaseFsck extends Configured implements Closeable { if (servers.isEmpty()) { assignMetaReplica(i); } else if (servers.size() > 1) { - errors - .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + + reporter + .error(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + metaHbckInfo.getReplicaId() + " is found on more than one region."); if (shouldFixAssignments()) { - errors.print("Trying to fix a problem with hbase:meta, replicaId " + + reporter.details("Trying to fix a problem with hbase:meta, replicaId " + metaHbckInfo.getReplicaId() +".."); setShouldRerun(); // try fix it (treat is a dupe assignment) @@ -3654,11 +3485,11 @@ public class HBaseFsck extends Configured implements Closeable { // unassign whatever is remaining in metaRegions. They are excess replicas. for (Map.Entry entry : metaRegions.entrySet()) { noProblem = false; - errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, + reporter.error(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, "hbase:meta replicas are deployed in excess. Configured " + metaReplication + ", deployed " + metaRegions.size()); if (shouldFixAssignments()) { - errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() + + reporter.details("Trying to undeploy excess replica, replicaId: " + entry.getKey() + " of hbase:meta.."); setShouldRerun(); unassignMetaReplica(entry.getValue()); @@ -3677,10 +3508,10 @@ public class HBaseFsck extends Configured implements Closeable { private void assignMetaReplica(int replicaId) throws IOException, KeeperException, InterruptedException { - errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + + reporter.error(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + replicaId +" is not found on any region."); if (shouldFixAssignments()) { - errors.print("Trying to fix a problem with hbase:meta.."); + reporter.error("Trying to fix a problem with hbase:meta.."); setShouldRerun(); // try to fix it (treat it as unassigned region) RegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( @@ -3715,7 +3546,7 @@ public class HBaseFsck extends Configured implements Closeable { RegionLocations rl = MetaTableAccessor.getRegionLocations(result); if (rl == null) { emptyRegionInfoQualifiers.add(result); - errors.reportError(ERROR_CODE.EMPTY_META_CELL, + reporter.error(ERROR_CODE.EMPTY_META_CELL, "Empty REGIONINFO_QUALIFIER found in hbase:meta"); return true; } @@ -3723,7 +3554,7 @@ public class HBaseFsck extends Configured implements Closeable { if (rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID) == null || rl.getRegionLocation(RegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) { emptyRegionInfoQualifiers.add(result); - errors.reportError(ERROR_CODE.EMPTY_META_CELL, + reporter.error(ERROR_CODE.EMPTY_META_CELL, "Empty REGIONINFO_QUALIFIER found in hbase:meta"); return true; } @@ -3767,7 +3598,7 @@ public class HBaseFsck extends Configured implements Closeable { // show proof of progress to the user, once for every 100 records. if (countRecord % 100 == 0) { - errors.progress(); + reporter.progress(); } countRecord++; return true; @@ -3782,7 +3613,7 @@ public class HBaseFsck extends Configured implements Closeable { MetaTableAccessor.fullScanRegions(connection, visitor); } - errors.print(""); + reporter.details(""); return true; } @@ -4088,21 +3919,21 @@ public class HBaseFsck extends Configured implements Closeable { private void printTableSummary(SortedMap tablesInfo) { StringBuilder sb = new StringBuilder(); int numOfSkippedRegions; - errors.print("Summary:"); + reporter.details("Summary:"); for (TableInfo tInfo : tablesInfo.values()) { numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ? skippedRegions.get(tInfo.getName()).size() : 0; - if (errors.tableHasErrors(tInfo)) { - errors.print("Table " + tInfo.getName() + " is inconsistent."); + if (reporter.tableHasErrors(tInfo.getName())) { + reporter.details("Table " + tInfo.getName() + " is inconsistent."); } else if (numOfSkippedRegions > 0){ - errors.print("Table " + tInfo.getName() + " is okay (with " + reporter.details("Table " + tInfo.getName() + " is okay (with " + numOfSkippedRegions + " skipped regions)."); } else { - errors.print("Table " + tInfo.getName() + " is okay."); + reporter.details("Table " + tInfo.getName() + " is okay."); } - errors.print(" Number of regions: " + tInfo.getNumRegions()); + reporter.details(" Number of regions: " + tInfo.getNumRegions()); if (numOfSkippedRegions > 0) { Set skippedRegionStrings = skippedRegions.get(tInfo.getName()); System.out.println(" Number of skipped regions: " + numOfSkippedRegions); @@ -4116,175 +3947,7 @@ public class HBaseFsck extends Configured implements Closeable { for (ServerName server : tInfo.deployedOn) { sb.append(" " + server.toString()); } - errors.print(sb.toString()); - } - } - - static ErrorReporter getErrorReporter( - final Configuration conf) throws ClassNotFoundException { - Class reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class); - return ReflectionUtils.newInstance(reporter, conf); - } - - public interface ErrorReporter { - enum ERROR_CODE { - UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META, - NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, - NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, - FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, - HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, - ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, - LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, - ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS, - UNSUPPORTED_OPTION, INVALID_TABLE - } - void clear(); - void report(String message); - void reportError(String message); - void reportError(ERROR_CODE errorCode, String message); - void reportError(ERROR_CODE errorCode, String message, TableInfo table); - void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info); - void reportError( - ERROR_CODE errorCode, - String message, - TableInfo table, - HbckInfo info1, - HbckInfo info2 - ); - int summarize(); - void detail(String details); - ArrayList getErrorList(); - void progress(); - void print(String message); - void resetErrors(); - boolean tableHasErrors(TableInfo table); - } - - static class PrintingErrorReporter implements ErrorReporter { - public int errorCount = 0; - private int showProgress; - // How frequently calls to progress() will create output - private static final int progressThreshold = 100; - - Set errorTables = new HashSet<>(); - - // for use by unit tests to verify which errors were discovered - private ArrayList errorList = new ArrayList<>(); - - @Override - public void clear() { - errorTables.clear(); - errorList.clear(); - errorCount = 0; - } - - @Override - public synchronized void reportError(ERROR_CODE errorCode, String message) { - if (errorCode == ERROR_CODE.WRONG_USAGE) { - System.err.println(message); - return; - } - - errorList.add(errorCode); - if (!summary) { - System.out.println("ERROR: " + message); - } - errorCount++; - showProgress = 0; - } - - @Override - public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) { - errorTables.add(table); - reportError(errorCode, message); - } - - @Override - public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, - HbckInfo info) { - errorTables.add(table); - String reference = "(region " + info.getRegionNameAsString() + ")"; - reportError(errorCode, reference + " " + message); - } - - @Override - public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, - HbckInfo info1, HbckInfo info2) { - errorTables.add(table); - String reference = "(regions " + info1.getRegionNameAsString() - + " and " + info2.getRegionNameAsString() + ")"; - reportError(errorCode, reference + " " + message); - } - - @Override - public synchronized void reportError(String message) { - reportError(ERROR_CODE.UNKNOWN, message); - } - - /** - * Report error information, but do not increment the error count. Intended for cases - * where the actual error would have been reported previously. - * @param message - */ - @Override - public synchronized void report(String message) { - if (! summary) { - System.out.println("ERROR: " + message); - } - showProgress = 0; - } - - @Override - public synchronized int summarize() { - System.out.println(Integer.toString(errorCount) + - " inconsistencies detected."); - if (errorCount == 0) { - System.out.println("Status: OK"); - return 0; - } else { - System.out.println("Status: INCONSISTENT"); - return -1; - } - } - - @Override - public ArrayList getErrorList() { - return errorList; - } - - @Override - public synchronized void print(String message) { - if (!summary) { - System.out.println(message); - } - } - - @Override - public boolean tableHasErrors(TableInfo table) { - return errorTables.contains(table); - } - - @Override - public void resetErrors() { - errorCount = 0; - } - - @Override - public synchronized void detail(String message) { - if (details) { - System.out.println(message); - } - showProgress = 0; - } - - @Override - public synchronized void progress() { - if (showProgress++ == progressThreshold) { - if (!summary) { - System.out.print("."); - } - showProgress = 0; - } + reporter.details(sb.toString()); } } @@ -4294,20 +3957,20 @@ public class HBaseFsck extends Configured implements Closeable { static class WorkItemRegion implements Callable { private final HBaseFsck hbck; private final ServerName rsinfo; - private final ErrorReporter errors; + private final HBaseFsckUtil.Reporter reporter; private final ClusterConnection connection; WorkItemRegion(HBaseFsck hbck, ServerName info, - ErrorReporter errors, ClusterConnection connection) { + HBaseFsckUtil.Reporter reporter, ClusterConnection connection) { this.hbck = hbck; this.rsinfo = info; - this.errors = errors; + this.reporter = reporter; this.connection = connection; } @Override public synchronized Void call() throws IOException { - errors.progress(); + reporter.progress(); try { BlockingInterface server = connection.getAdmin(rsinfo); @@ -4316,10 +3979,10 @@ public class HBaseFsck extends Configured implements Closeable { regions = filterRegions(regions); if (details) { - errors.detail("RegionServer: " + rsinfo.getServerName() + + reporter.details("RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size()); for (RegionInfo rinfo: regions) { - errors.detail(" " + rinfo.getRegionNameAsString() + + reporter.details(" " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() + " encoded_name: " + rinfo.getEncodedName() + " start: " + Bytes.toStringBinary(rinfo.getStartKey()) + @@ -4333,7 +3996,7 @@ public class HBaseFsck extends Configured implements Closeable { hbi.addServer(r, rsinfo); } } catch (IOException e) { // unable to connect to the region server. - errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() + + reporter.error(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e); throw e; } @@ -4358,14 +4021,14 @@ public class HBaseFsck extends Configured implements Closeable { */ class WorkItemHdfsDir implements Callable { private FileStatus tableDir; - private ErrorReporter errors; + private HBaseFsckUtil.Reporter reporter; private FileSystem fs; - WorkItemHdfsDir(FileSystem fs, ErrorReporter errors, + WorkItemHdfsDir(FileSystem fs, HBaseFsckUtil.Reporter reporter, FileStatus status) { this.fs = fs; this.tableDir = status; - this.errors = errors; + this.reporter = reporter; } @Override @@ -4377,7 +4040,7 @@ public class HBaseFsck extends Configured implements Closeable { final List> futures = new ArrayList<>(regionDirs.length); for (final FileStatus regionDir : regionDirs) { - errors.progress(); + reporter.progress(); final String encodedName = regionDir.getPath().getName(); // ignore directories that aren't hexadecimal if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) { @@ -4411,7 +4074,7 @@ public class HBaseFsck extends Configured implements Closeable { HdfsEntry he = new HdfsEntry(); synchronized (hbi) { if (hbi.getHdfsRegionDir() != null) { - errors.print("Directory " + encodedName + " duplicate??" + + reporter.details("Directory " + encodedName + " duplicate??" + hbi.getHdfsRegionDir()); } @@ -4426,7 +4089,7 @@ public class HBaseFsck extends Configured implements Closeable { FileStatus[] subDirs = fs.listStatus(regionDir.getPath()); Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath()); for (FileStatus subDir : subDirs) { - errors.progress(); + reporter.progress(); String sdName = subDir.getPath().getName(); if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) { he.hdfsOnlyEdits = false; @@ -4460,7 +4123,7 @@ public class HBaseFsck extends Configured implements Closeable { exceptions.add(e); } finally { if (!exceptions.isEmpty()) { - errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " + reporter.error(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " + tableDir.getPath().getName() + " Unable to fetch all HDFS region information. "); // Just throw the first exception as an indication something bad happened @@ -4479,9 +4142,9 @@ public class HBaseFsck extends Configured implements Closeable { static class WorkItemHdfsRegionInfo implements Callable { private HbckInfo hbi; private HBaseFsck hbck; - private ErrorReporter errors; + private HBaseFsckUtil.Reporter errors; - WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) { + WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, HBaseFsckUtil.Reporter errors) { this.hbi = hbi; this.hbck = hbck; this.errors = errors; @@ -4500,7 +4163,7 @@ public class HBaseFsck extends Configured implements Closeable { + hbi.getHdfsRegionDir() + "! It may be an invalid format or version file. Treating as " + "an orphaned regiondir."; - errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); + errors.error(ERROR_CODE.ORPHAN_HDFS_REGION, msg); try { hbck.debugLsr(hbi.getHdfsRegionDir()); } catch (IOException ioe2) { @@ -4869,7 +4532,7 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" -cleanReplicationBrarier [tableName] clean the replication barriers " + "of a specified table, tableName is required"); out.flush(); - errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString()); + reporter.error(ERROR_CODE.WRONG_USAGE, sw.toString()); setRetCode(-2); return this; @@ -4923,36 +4586,35 @@ public class HBaseFsck extends Configured implements Closeable { setForceExclusive(); } else if (cmd.equals("-timelag")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); + reporter.error(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); return printUsageAndExit(); } try { long timelag = Long.parseLong(args[++i]); setTimeLag(timelag); } catch (NumberFormatException e) { - errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value."); + reporter.error(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value."); return printUsageAndExit(); } } else if (cmd.equals("-sleepBeforeRerun")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, - "HBaseFsck: -sleepBeforeRerun needs a value."); + reporter.error(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value."); return printUsageAndExit(); } try { sleepBeforeRerun = Long.parseLong(args[++i]); } catch (NumberFormatException e) { - errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value."); + reporter.error(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value."); return printUsageAndExit(); } } else if (cmd.equals("-sidelineDir")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value."); + reporter.error(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value."); return printUsageAndExit(); } setSidelineDir(args[++i]); } else if (cmd.equals("-fix")) { - errors.reportError(ERROR_CODE.WRONG_USAGE, + reporter.error(ERROR_CODE.WRONG_USAGE, "This option is deprecated, please use -fixAssignments instead."); setFixAssignments(true); } else if (cmd.equals("-fixAssignments")) { @@ -5015,7 +4677,7 @@ public class HBaseFsck extends Configured implements Closeable { setCheckHdfs(true); } else if (cmd.equals("-maxOverlapsToSideline")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, + reporter.error(ERROR_CODE.WRONG_USAGE, "-maxOverlapsToSideline needs a numeric value argument."); return printUsageAndExit(); } @@ -5023,13 +4685,13 @@ public class HBaseFsck extends Configured implements Closeable { int maxOverlapsToSideline = Integer.parseInt(args[++i]); setMaxOverlapsToSideline(maxOverlapsToSideline); } catch (NumberFormatException e) { - errors.reportError(ERROR_CODE.WRONG_USAGE, + reporter.error(ERROR_CODE.WRONG_USAGE, "-maxOverlapsToSideline needs a numeric value argument."); return printUsageAndExit(); } } else if (cmd.equals("-maxMerge")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, + reporter.error(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument."); return printUsageAndExit(); } @@ -5037,7 +4699,7 @@ public class HBaseFsck extends Configured implements Closeable { int maxMerge = Integer.parseInt(args[++i]); setMaxMerge(maxMerge); } catch (NumberFormatException e) { - errors.reportError(ERROR_CODE.WRONG_USAGE, + reporter.error(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument."); return printUsageAndExit(); } @@ -5056,15 +4718,15 @@ public class HBaseFsck extends Configured implements Closeable { } setCleanReplicationBarrierTable(args[i]); } else if (cmd.startsWith("-")) { - errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd); + reporter.error(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd); return printUsageAndExit(); } else { includeTable(TableName.valueOf(cmd)); - errors.print("Allow checking/fixes for table: " + cmd); + reporter.details("Allow checking/fixes for table: " + cmd); } } - errors.print("HBaseFsck command line options: " + StringUtils.join(args, " ")); + reporter.details("HBaseFsck command line options: " + StringUtils.join(args, " ")); // pre-check current user has FS write permission or not try { @@ -5101,7 +4763,7 @@ public class HBaseFsck extends Configured implements Closeable { tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir); } hfcc.checkTables(tableDirs); - hfcc.report(errors); + hfcc.report(reporter); } // check and fix table integrity, region consistency. @@ -5126,7 +4788,7 @@ public class HBaseFsck extends Configured implements Closeable { setFixHdfsOverlaps(false); setFixVersionFile(false); setFixTableOrphans(false); - errors.resetErrors(); + reporter.resetErrors(); code = onlineHbck(); setRetCode(code); } @@ -5143,7 +4805,7 @@ public class HBaseFsck extends Configured implements Closeable { // Process command-line args. for (String arg : args) { if (unsupportedOptionsInV2.contains(arg)) { - errors.reportError(ERROR_CODE.UNSUPPORTED_OPTION, + reporter.error(ERROR_CODE.UNSUPPORTED_OPTION, "option '" + arg + "' is not " + "supportted!"); result = false; break; @@ -5162,7 +4824,7 @@ public class HBaseFsck extends Configured implements Closeable { return; } if (cleanReplicationBarrierTable.isSystemTable()) { - errors.reportError(ERROR_CODE.INVALID_TABLE, + reporter.error(ERROR_CODE.INVALID_TABLE, "invalid table: " + cleanReplicationBarrierTable); return; } @@ -5175,7 +4837,7 @@ public class HBaseFsck extends Configured implements Closeable { } if (isGlobalScope) { - errors.reportError(ERROR_CODE.INVALID_TABLE, + reporter.error(ERROR_CODE.INVALID_TABLE, "table's replication scope is global: " + cleanReplicationBarrierTable); return; } @@ -5195,7 +4857,7 @@ public class HBaseFsck extends Configured implements Closeable { } } if (regionNames.size() <= 0) { - errors.reportError(ERROR_CODE.INVALID_TABLE, + reporter.error(ERROR_CODE.INVALID_TABLE, "there is no barriers of this table: " + cleanReplicationBarrierTable); return; } @@ -5236,7 +4898,7 @@ public class HBaseFsck extends Configured implements Closeable { * ls -r for debugging purposes */ void debugLsr(Path p) throws IOException { - debugLsr(getConf(), p, errors); + debugLsr(getConf(), p, reporter); } /** @@ -5244,14 +4906,14 @@ public class HBaseFsck extends Configured implements Closeable { */ public static void debugLsr(Configuration conf, Path p) throws IOException { - debugLsr(conf, p, new PrintingErrorReporter()); + debugLsr(conf, p, new HBaseFsckUtil.PrintingReporter()); } /** * ls -r for debugging purposes */ - public static void debugLsr(Configuration conf, - Path p, ErrorReporter errors) throws IOException { + public static void debugLsr(Configuration conf, Path p, HBaseFsckUtil.Reporter reporter) + throws IOException { if (!LOG.isDebugEnabled() || p == null) { return; } @@ -5261,7 +4923,7 @@ public class HBaseFsck extends Configured implements Closeable { // nothing return; } - errors.print(p.toString()); + reporter.details(p.toString()); if (fs.isFile(p)) { return; @@ -5270,7 +4932,7 @@ public class HBaseFsck extends Configured implements Closeable { if (fs.getFileStatus(p).isDirectory()) { FileStatus[] fss= fs.listStatus(p); for (FileStatus status : fss) { - debugLsr(conf, status.getPath(), errors); + debugLsr(conf, status.getPath(), reporter); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck2.java new file mode 100644 index 0000000000000000000000000000000000000000..4e398cf472a12b878481a20b29589eb8df700e57 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck2.java @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellBuilderFactory; +import org.apache.hadoop.hbase.CellBuilderType; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.TableState; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos; +import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hadoop.hbase.util.HBaseFsckUtil.Reporter.ERROR_CODE; +import org.apache.log4j.Category; +import org.apache.log4j.Level; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +@InterfaceStability.Evolving +public class HBaseFsck2 extends AbstractHBaseTool implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(HBaseFsck2.class); + + private static long startTime = System.currentTimeMillis(); + private boolean reportOnly = true; + + private long timelag = 0; // in milliseconds + private boolean exclusive = true; + + private Set tableNames = null; + private Set regionNames = null; + private Set serverNames = null; + + private boolean details = false; + private boolean verbose = false; + + private org.apache.hbase.thirdparty.org.apache.commons.cli.Options actions = + new org.apache.hbase.thirdparty.org.apache.commons.cli.Options(); + + private Set actionNames = new HashSet<>(); + + Collection regionServers = new ArrayList(); + private Map tableStates; + private Set rits; + + HBaseFsckUtil.Reporter reporter; + + private boolean isExclusive() { + return !reportOnly || exclusive; + } + + private boolean isVersionSupported(final String hbaseServerVersion) { + return VersionInfo.compareVersion("2.any.any", hbaseServerVersion) < 0; + } + + private String getActionsUsage() { + return new HelpFormatter() { + public String getActionsUsage() { + StringBuffer sb = new StringBuffer("Actions:" + this.getNewLine()); + return renderOptions(sb, 120, actions, this.getLeftPadding(), + this.getDescPadding()).toString(); + } + + @Override + protected StringBuffer renderOptions(StringBuffer sb, int width, + org.apache.hbase.thirdparty.org.apache.commons.cli.Options options, + int leftPad, int descPad) { + String lpad = this.createPadding(leftPad); + String dpad = this.createPadding(descPad); + int max = 0; + List prefixList = new ArrayList(); + + List