diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index c066803..d961c28 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -1,19 +1,12 @@ /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable + * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" + * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License + * for the specific language governing permissions and limitations under the License. */ package org.apache.hadoop.hbase.util; @@ -54,14 +47,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import com.google.common.collect.Multimap; -import com.google.common.collect.Ordering; -import com.google.common.collect.TreeMultimap; -import com.google.protobuf.ServiceException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.StringUtils; @@ -143,50 +128,52 @@ import org.apache.hadoop.util.ToolRunner; import org.apache.zookeeper.KeeperException; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Multimap; +import com.google.common.collect.Ordering; +import com.google.common.collect.TreeMultimap; +import com.google.protobuf.ServiceException; + /** - * HBaseFsck (hbck) is a tool for checking and repairing region consistency and - * table integrity problems in a corrupted HBase. + * HBaseFsck (hbck) is a tool for checking and repairing region consistency and table integrity + * problems in a corrupted HBase. *

- * Region consistency checks verify that hbase:meta, region deployment on region - * servers and the state of data in HDFS (.regioninfo files) all are in - * accordance. + * Region consistency checks verify that hbase:meta, region deployment on region servers and the + * state of data in HDFS (.regioninfo files) all are in accordance. *

- * Table integrity checks verify that all possible row keys resolve to exactly - * one region of a table. This means there are no individual degenerate - * or backwards regions; no holes between regions; and that there are no - * overlapping regions. + * Table integrity checks verify that all possible row keys resolve to exactly one region of a + * table. This means there are no individual degenerate or backwards regions; no holes between + * regions; and that there are no overlapping regions. *

* The general repair strategy works in two phases: *

    - *
  1. Repair Table Integrity on HDFS. (merge or fabricate regions) - *
  2. Repair Region Consistency with hbase:meta and assignments + *
  3. Repair Table Integrity on HDFS. (merge or fabricate regions) + *
  4. Repair Region Consistency with hbase:meta and assignments *
*

- * For table integrity repairs, the tables' region directories are scanned - * for .regioninfo files. Each table's integrity is then verified. If there - * are any orphan regions (regions with no .regioninfo files) or holes, new - * regions are fabricated. Backwards regions are sidelined as well as empty - * degenerate (endkey==startkey) regions. If there are any overlapping regions, - * a new region is created and all data is merged into the new region. + * For table integrity repairs, the tables' region directories are scanned for .regioninfo files. + * Each table's integrity is then verified. If there are any orphan regions (regions with no + * .regioninfo files) or holes, new regions are fabricated. Backwards regions are sidelined as well + * as empty degenerate (endkey==startkey) regions. If there are any overlapping regions, a new + * region is created and all data is merged into the new region. *

- * Table integrity repairs deal solely with HDFS and could potentially be done - * offline -- the hbase region servers or master do not need to be running. - * This phase can eventually be used to completely reconstruct the hbase:meta table in - * an offline fashion. + * Table integrity repairs deal solely with HDFS and could potentially be done offline -- the hbase + * region servers or master do not need to be running. This phase can eventually be used to + * completely reconstruct the hbase:meta table in an offline fashion. *

- * Region consistency requires three conditions -- 1) valid .regioninfo file - * present in an HDFS region dir, 2) valid row with .regioninfo data in META, - * and 3) a region is deployed only at the regionserver that was assigned to - * with proper state in the master. + * Region consistency requires three conditions -- 1) valid .regioninfo file present in an HDFS + * region dir, 2) valid row with .regioninfo data in META, and 3) a region is deployed only at the + * regionserver that was assigned to with proper state in the master. *

- * Region consistency repairs require hbase to be online so that hbck can - * contact the HBase master and region servers. The hbck#connect() method must - * first be called successfully. Much of the region consistency information - * is transient and less risky to repair. + * Region consistency repairs require hbase to be online so that hbck can contact the HBase master + * and region servers. The hbck#connect() method must first be called successfully. Much of the + * region consistency information is transient and less risky to repair. *

- * If hbck is run from the command line, there are a handful of arguments that - * can be used to limit the kinds of repairs hbck will do. See the code in - * {@link #printUsageAndExit()} for more details. + * If hbck is run from the command line, there are a handful of arguments that can be used to limit + * the kinds of repairs hbck will do. See the code in {@link #printUsageAndExit()} for more details. */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) @InterfaceStability.Evolving @@ -272,23 +259,20 @@ int fixes = 0; /** - * This map contains the state of all hbck items. It maps from encoded region - * name to HbckInfo structure. The information contained in HbckInfo is used - * to detect and correct consistency (hdfs/meta/deployment) problems. + * This map contains the state of all hbck items. It maps from encoded region name to HbckInfo + * structure. The information contained in HbckInfo is used to detect and correct consistency + * (hdfs/meta/deployment) problems. */ private TreeMap regionInfoMap = new TreeMap(); // Empty regioninfo qualifiers in hbase:meta private Set emptyRegionInfoQualifiers = new HashSet(); /** - * This map from Tablename -> TableInfo contains the structures necessary to - * detect table consistency problems (holes, dupes, overlaps). It is sorted - * to prevent dupes. - * - * If tablesIncluded is empty, this map contains all tables. - * Otherwise, it contains only meta tables and tables in tablesIncluded, - * unless checkMetaOnly is specified, in which case, it contains only - * the meta table + * This map from Tablename -> TableInfo contains the structures necessary to detect table + * consistency problems (holes, dupes, overlaps). It is sorted to prevent dupes. If tablesIncluded + * is empty, this map contains all tables. Otherwise, it contains only meta tables and tables in + * tablesIncluded, unless checkMetaOnly is specified, in which case, it contains only the meta + * table */ private SortedMap tablesInfo = new ConcurrentSkipListMap(); @@ -298,17 +282,14 @@ */ private List orphanHdfsDirs = Collections.synchronizedList(new ArrayList()); - private Map> orphanTableDirs = - new HashMap>(); - private Map tableStates = - new HashMap(); + private Map> orphanTableDirs = new HashMap>(); + private Map tableStates = new HashMap(); private final RetryCounterFactory lockFileRetryCounterFactory; private Map> skippedRegions = new HashMap>(); /** * Constructor - * * @param conf Configuration object * @throws MasterNotRunningException if the master is not running * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper @@ -325,13 +306,9 @@ /** * Constructor - * - * @param conf - * Configuration object - * @throws MasterNotRunningException - * if the master is not running - * @throws ZooKeeperConnectionException - * if unable to connect to ZooKeeper + * @param conf Configuration object + * @throws MasterNotRunningException if the master is not running + * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper */ public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException { @@ -339,11 +316,11 @@ errors = getErrorReporter(getConf()); this.executor = exec; lockFileRetryCounterFactory = new RetryCounterFactory( - getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), - getConf().getInt( - "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), - getConf().getInt( - "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); + getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), + getConf().getInt("hbase.hbck.lockfile.attempt.sleep.interval", + DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), + getConf().getInt("hbase.hbck.lockfile.attempt.maxsleeptime", + DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); } private class FileLockCallable implements Callable { @@ -352,12 +329,13 @@ public FileLockCallable(RetryCounter retryCounter) { this.retryCounter = retryCounter; } + @Override public FSDataOutputStream call() throws IOException { try { FileSystem fs = FSUtils.getCurrentFileSystem(getConf()); - FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(), - HConstants.DATA_FILE_UMASK_KEY); + FsPermission defaultPerms = + FSUtils.getFilePermissions(fs, getConf(), HConstants.DATA_FILE_UMASK_KEY); Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY); fs.mkdirs(tmpDir); HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE); @@ -365,8 +343,8 @@ out.writeBytes(InetAddress.getLocalHost().toString()); out.flush(); return out; - } catch(RemoteException e) { - if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){ + } catch (RemoteException e) { + if (AlreadyBeingCreatedException.class.getName().equals(e.getClassName())) { return null; } else { throw e; @@ -375,26 +353,22 @@ } private FSDataOutputStream createFileWithRetries(final FileSystem fs, - final Path hbckLockFilePath, final FsPermission defaultPerms) - throws IOException { + final Path hbckLockFilePath, final FsPermission defaultPerms) throws IOException { IOException exception = null; do { try { return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false); } catch (IOException ioe) { - LOG.info("Failed to create lock file " + hbckLockFilePath.getName() - + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of " - + retryCounter.getMaxAttempts()); - LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), - ioe); + LOG.info("Failed to create lock file " + hbckLockFilePath.getName() + ", try=" + + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts()); + LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), ioe); try { exception = ioe; retryCounter.sleepUntilNextRetry(); } catch (InterruptedException ie) { throw (InterruptedIOException) new InterruptedIOException( - "Can't create lock file " + hbckLockFilePath.getName()) - .initCause(ie); + "Can't create lock file " + hbckLockFilePath.getName()).initCause(ie); } } } while (retryCounter.shouldRetry()); @@ -405,7 +379,6 @@ /** * This method maintains a lock using a file. If the creation fails we return null - * * @return FSDataOutputStream object corresponding to the newly opened lock file * @throws IOException */ @@ -415,8 +388,8 @@ ExecutorService executor = Executors.newFixedThreadPool(1); FutureTask futureTask = new FutureTask(callable); executor.execute(futureTask); - final int timeoutInSeconds = getConf().getInt( - "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT); + final int timeoutInSeconds = + getConf().getInt("hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT); FSDataOutputStream stream = null; try { stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS); @@ -441,21 +414,18 @@ do { try { IOUtils.closeQuietly(hbckOutFd); - FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), - HBCK_LOCK_PATH, true); + FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true); LOG.info("Finishing hbck"); return; } catch (IOException ioe) { LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try=" - + (retryCounter.getAttemptTimes() + 1) + " of " - + retryCounter.getMaxAttempts()); + + (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts()); LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe); try { retryCounter.sleepUntilNextRetry(); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); - LOG.warn("Interrupted while deleting lock file" + - HBCK_LOCK_PATH); + LOG.warn("Interrupted while deleting lock file" + HBCK_LOCK_PATH); return; } } @@ -464,8 +434,7 @@ } /** - * To repair region consistency, one must call connect() in order to repair - * online state. + * To repair region consistency, one must call connect() in order to repair online state. */ public void connect() throws IOException { @@ -474,16 +443,15 @@ hbckOutFd = checkAndMarkRunningHbck(); if (hbckOutFd == null) { setRetCode(-1); - LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + - "[If you are sure no other instance is running, delete the lock file " + - HBCK_LOCK_PATH + " and rerun the tool]"); + LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + + "[If you are sure no other instance is running, delete the lock file " + + HBCK_LOCK_PATH + " and rerun the tool]"); throw new IOException("Duplicate hbck - Abort"); } // Make sure to cleanup the lock hbckLockCleanup.set(true); } - // Add a shutdown hook to this thread, in case user tries to // kill the hbck with a ctrl-c, we want to cleanup the lock so that @@ -498,7 +466,7 @@ LOG.info("Launching hbck"); - connection = (ClusterConnection)ConnectionFactory.createConnection(getConf()); + connection = (ClusterConnection) ConnectionFactory.createConnection(getConf()); admin = connection.getAdmin(); meta = connection.getTable(TableName.META_TABLE_NAME); status = admin.getClusterStatus(); @@ -512,7 +480,7 @@ Collection regionServers = status.getServers(); errors.print("Number of live region servers: " + regionServers.size()); if (details) { - for (ServerName rsinfo: regionServers) { + for (ServerName rsinfo : regionServers) { errors.print(" " + rsinfo.getServerName()); } } @@ -521,7 +489,7 @@ Collection deadRegionServers = status.getDeadServerNames(); errors.print("Number of dead region servers: " + deadRegionServers.size()); if (details) { - for (ServerName name: deadRegionServers) { + for (ServerName name : deadRegionServers) { errors.print(" " + name); } } @@ -533,7 +501,7 @@ Collection backupMasters = status.getBackupMasters(); errors.print("Number of backup masters: " + backupMasters.size()); if (details) { - for (ServerName name: backupMasters) { + for (ServerName name : backupMasters) { errors.print(" " + name); } } @@ -545,7 +513,7 @@ Map rits = status.getRegionsInTransition(); errors.print("Number of regions in transition: " + rits.size()); if (details) { - for (RegionState state: rits.values()) { + for (RegionState state : rits.values()) { errors.print(" " + state.toDescriptiveString()); } } @@ -570,9 +538,8 @@ } /** - * This repair method analyzes hbase data in hdfs and repairs it to satisfy - * the table integrity rules. HBase doesn't need to be online for this - * operation to work. + * This repair method analyzes hbase data in hdfs and repairs it to satisfy the table integrity + * rules. HBase doesn't need to be online for this operation to work. */ public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { // Initial pass to fix orphans. @@ -603,15 +570,12 @@ } /** - * This repair method requires the cluster to be online since it contacts - * region servers and the masters. It makes each region's state in HDFS, in - * hbase:meta, and deployments consistent. - * - * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable - * error. If 0, we have a clean hbase. + * This repair method requires the cluster to be online since it contacts region servers and the + * masters. It makes each region's state in HDFS, in hbase:meta, and deployments consistent. + * @return If > 0 , number of errors detected, if < 0 there was an unrecoverable error. If 0, + * we have a clean hbase. */ - public int onlineConsistencyRepair() throws IOException, KeeperException, - InterruptedException { + public int onlineConsistencyRepair() throws IOException, KeeperException, InterruptedException { clearState(); // get regions according to what is online on each RegionServer @@ -674,7 +638,8 @@ * Contacts the master and prints out cluster-wide information * @return 0 on success, non-zero on failure */ - public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException { + public int onlineHbck() + throws IOException, KeeperException, InterruptedException, ServiceException { // print hbase server version errors.print("Version: " + status.getHBaseVersion()); offlineHdfsIntegrityRepair(); @@ -686,8 +651,7 @@ try { onlineConsistencyRepair(); - } - finally { + } finally { // Only restore the balancer if it was true when we started repairing and // we actually disabled it. Otherwise, we might clobber another run of // hbck that has just restored it. @@ -705,7 +669,7 @@ checkAndFixTableLocks(); checkAndFixReplication(); - + // Remove the hbck lock unlockHbck(); @@ -714,9 +678,8 @@ return errors.summarize(); } - public static byte[] keyOnly (byte[] b) { - if (b == null) - return b; + public static byte[] keyOnly(byte[] b) { + if (b == null) return b; int rowlength = Bytes.toShort(b, 0); byte[] result = new byte[rowlength]; System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength); @@ -737,18 +700,19 @@ } private static class RegionBoundariesInformation { - public byte [] regionName; - public byte [] metaFirstKey; - public byte [] metaLastKey; - public byte [] storesFirstKey; - public byte [] storesLastKey; + public byte[] regionName; + public byte[] metaFirstKey; + public byte[] metaLastKey; + public byte[] storesFirstKey; + public byte[] storesLastKey; + @Override - public String toString () { - return "regionName=" + Bytes.toStringBinary(regionName) + - "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) + - "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) + - "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) + - "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey); + public String toString() { + return "regionName=" + Bytes.toStringBinary(regionName) + "\nmetaFirstKey=" + + Bytes.toStringBinary(metaFirstKey) + "\nmetaLastKey=" + + Bytes.toStringBinary(metaLastKey) + "\nstoresFirstKey=" + + Bytes.toStringBinary(storesFirstKey) + "\nstoresLastKey=" + + Bytes.toStringBinary(storesLastKey); } } @@ -777,17 +741,17 @@ FileStatus[] storeFiles = fs.listStatus(file.getPath()); // For all the stores in this column family. for (FileStatus storeFile : storeFiles) { - HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig( - getConf()), getConf()); + HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), + new CacheConfig(getConf()), getConf()); if ((reader.getFirstKey() != null) && ((storeFirstKey == null) || (comparator.compare(storeFirstKey, - ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) { - storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey()).getKey(); + ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) { + storeFirstKey = ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey(); } if ((reader.getLastKey() != null) && ((storeLastKey == null) || (comparator.compare(storeLastKey, - ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey())) < 0)) { - storeLastKey = ((KeyValue.KeyOnlyKeyValue)reader.getLastKey()).getKey(); + ((KeyValue.KeyOnlyKeyValue) reader.getLastKey()).getKey())) < 0)) { + storeLastKey = ((KeyValue.KeyOnlyKeyValue) reader.getLastKey()).getKey(); } reader.close(); } @@ -795,7 +759,7 @@ } currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey(); currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey(); - currentRegionBoundariesInformation.storesFirstKey = storeFirstKey; + currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey); currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey); if (currentRegionBoundariesInformation.metaFirstKey.length == 0) currentRegionBoundariesInformation.metaFirstKey = null; @@ -811,16 +775,14 @@ // Checking start key. if ((currentRegionBoundariesInformation.storesFirstKey != null) && (currentRegionBoundariesInformation.metaFirstKey != null)) { - valid = valid - && comparator.compare(currentRegionBoundariesInformation.storesFirstKey, - currentRegionBoundariesInformation.metaFirstKey) >= 0; + valid = valid && comparator.compare(currentRegionBoundariesInformation.storesFirstKey, + currentRegionBoundariesInformation.metaFirstKey) >= 0; } // Checking stop key. if ((currentRegionBoundariesInformation.storesLastKey != null) && (currentRegionBoundariesInformation.metaLastKey != null)) { - valid = valid - && comparator.compare(currentRegionBoundariesInformation.storesLastKey, - currentRegionBoundariesInformation.metaLastKey) < 0; + valid = valid && comparator.compare(currentRegionBoundariesInformation.storesLastKey, + currentRegionBoundariesInformation.metaLastKey) < 0; } if (!valid) { errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries", @@ -845,13 +807,11 @@ } /** - * Orphaned regions are regions without a .regioninfo file in them. We "adopt" - * these orphans by creating a new region, and moving the column families, - * recovered edits, WALs, into the new region dir. We determine the region - * startkey and endkeys by looking at all of the hfiles inside the column - * families to identify the min and max keys. The resulting region will - * likely violate table integrity but will be dealt with by merging - * overlapping regions. + * Orphaned regions are regions without a .regioninfo file in them. We "adopt" these orphans by + * creating a new region, and moving the column families, recovered edits, WALs, into the new + * region dir. We determine the region startkey and endkeys by looking at all of the hfiles inside + * the column families to identify the min and max keys. The resulting region will likely violate + * table integrity but will be dealt with by merging overlapping regions. */ @SuppressWarnings("deprecation") private void adoptHdfsOrphan(HbckInfo hi) throws IOException { @@ -859,9 +819,9 @@ FileSystem fs = p.getFileSystem(getConf()); FileStatus[] dirs = fs.listStatus(p); if (dirs == null) { - LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " + - p + ". This dir could probably be deleted."); - return ; + LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " + p + + ". This dir could probably be deleted."); + return; } TableName tableName = hi.getTableName(); @@ -870,9 +830,9 @@ HTableDescriptor template = tableInfo.getHTD(); // find min and max key values - Pair orphanRegionRange = null; + Pair orphanRegionRange = null; for (FileStatus cf : dirs) { - String cfName= cf.getPath().getName(); + String cfName = cf.getPath().getName(); // TODO Figure out what the special dirs are if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue; @@ -911,7 +871,7 @@ if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) { orphanRegionRange.setFirst(start); } - if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) { + if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) { orphanRegionRange.setSecond(end); } } @@ -923,11 +883,12 @@ sidelineRegionDir(fs, hi); return; } - LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + - Bytes.toString(orphanRegionRange.getSecond()) + ")"); + LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + + Bytes.toString(orphanRegionRange.getSecond()) + ")"); - // create new region on hdfs. move data into place. - HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), orphanRegionRange.getSecond()); + // create new region on hdfs. move data into place. + HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), + orphanRegionRange.getSecond()); LOG.info("Creating new region : " + hri); HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template); Path target = region.getRegionFileSystem().getRegionDir(); @@ -938,11 +899,9 @@ } /** - * This method determines if there are table integrity errors in HDFS. If - * there are errors and the appropriate "fix" options are enabled, the method - * will first correct orphan regions making them into legit regiondirs, and - * then reload to merge potentially overlapping regions. - * + * This method determines if there are table integrity errors in HDFS. If there are errors and the + * appropriate "fix" options are enabled, the method will first correct orphan regions making them + * into legit regiondirs, and then reload to merge potentially overlapping regions. * @return number of table integrity errors found */ private int restoreHdfsIntegrity() throws IOException, InterruptedException { @@ -986,12 +945,12 @@ } /** - * Scan all the store file names to find any lingering reference files, - * which refer to some none-exiting files. If "fix" option is enabled, - * any lingering reference file will be sidelined if found. + * Scan all the store file names to find any lingering reference files, which refer to some + * none-exiting files. If "fix" option is enabled, any lingering reference file will be sidelined + * if found. *

- * Lingering reference file prevents a region from opening. It has to - * be fixed before a cluster can start properly. + * Lingering reference file prevents a region from opening. It has to be fixed before a cluster + * can start properly. */ private void offlineReferenceFileRepair() throws IOException { Configuration conf = getConf(); @@ -1001,7 +960,7 @@ Map allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, errors); errors.print(""); LOG.info("Validating mapping using HDFS state"); - for (Path path: allFiles.values()) { + for (Path path : allFiles.values()) { boolean isReference = false; try { isReference = StoreFileInfo.isReference(path); @@ -1014,7 +973,7 @@ if (!isReference) continue; Path referredToFile = StoreFileInfo.getReferredToFile(path); - if (fs.exists(referredToFile)) continue; // good, expected + if (fs.exists(referredToFile)) continue; // good, expected // Found a lingering reference file errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE, @@ -1037,8 +996,7 @@ Path rootDir = getSidelineDir(); Path dst = new Path(rootDir, pathStr.substring(index + 1)); fs.mkdirs(dst.getParent()); - LOG.info("Trying to sildeline reference file " - + path + " to " + dst); + LOG.info("Trying to sildeline reference file " + path + " to " + dst); setShouldRerun(); success = fs.rename(path, dst); @@ -1053,10 +1011,10 @@ * TODO -- need to add tests for this. */ private void reportEmptyMetaCells() { - errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + - emptyRegionInfoQualifiers.size()); + errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " + + emptyRegionInfoQualifiers.size()); if (details) { - for (Result r: emptyRegionInfoQualifiers) { + for (Result r : emptyRegionInfoQualifiers) { errors.print(" " + r); } } @@ -1074,10 +1032,9 @@ errors.detail("Number of Tables in flux: " + numSkipped.get()); } for (HTableDescriptor td : allTables) { - errors.detail(" Table: " + td.getTableName() + "\t" + - (td.isReadOnly() ? "ro" : "rw") + "\t" + - (td.isMetaRegion() ? "META" : " ") + "\t" + - " families: " + td.getFamilies().size()); + errors.detail(" Table: " + td.getTableName() + "\t" + (td.isReadOnly() ? "ro" : "rw") + + "\t" + (td.isMetaRegion() ? "META" : " ") + "\t" + " families: " + + td.getFamilies().size()); } } } @@ -1087,8 +1044,8 @@ } /** - * Read the .regioninfo file from the file system. If there is no - * .regioninfo, add it to the orphan hdfs region list. + * Read the .regioninfo file from the file system. If there is no .regioninfo, add it to the + * orphan hdfs region list. */ private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException { Path regionDir = hbi.getHdfsRegionDir(); @@ -1109,12 +1066,12 @@ } /** - * Exception thrown when a integrity repair operation fails in an - * unresolvable way. + * Exception thrown when a integrity repair operation fails in an unresolvable way. */ public static class RegionRepairException extends IOException { private static final long serialVersionUID = 1L; final IOException ioe; + public RegionRepairException(String s, IOException ioe) { super(s); this.ioe = ioe; @@ -1142,27 +1099,26 @@ // Submit and wait for completion hbiFutures = executor.invokeAll(hbis); - for(int i=0; i f = hbiFutures.get(i); try { f.get(); - } catch(ExecutionException e) { - LOG.warn("Failed to read .regioninfo file for region " + - work.hbi.getRegionNameAsString(), e.getCause()); + } catch (ExecutionException e) { + LOG.warn("Failed to read .regioninfo file for region " + work.hbi.getRegionNameAsString(), + e.getCause()); } } Path hbaseRoot = FSUtils.getRootDir(getConf()); FileSystem fs = hbaseRoot.getFileSystem(getConf()); // serialized table info gathering. - for (HbckInfo hbi: hbckInfos) { + for (HbckInfo hbi : hbckInfos) { if (hbi.getHdfsHRI() == null) { // was an orphan continue; } - // get table name from hdfs, populate various HBaseFsck tables. TableName tableName = hbi.getTableName(); @@ -1184,9 +1140,9 @@ } catch (IOException ioe) { if (!orphanTableDirs.containsKey(tableName)) { LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); - //should only report once for each table + // should only report once for each table errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, - "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); + "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); Set columns = new HashSet(); orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); } @@ -1230,7 +1186,7 @@ */ private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName, Set columns) throws IOException { - if (columns ==null || columns.isEmpty()) return false; + if (columns == null || columns.isEmpty()) return false; HTableDescriptor htd = new HTableDescriptor(tableName); for (String columnfamimly : columns) { htd.addFamily(new HColumnDescriptor(columnfamimly)); @@ -1260,7 +1216,8 @@ * 2. else create a default .tableinfo file with following items
*  2.1 the correct tablename
*  2.2 the correct colfamily list
- *  2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}
+ *  2.3 the default properties for both {@link HTableDescriptor} and + * {@link HColumnDescriptor}
* @throws IOException */ public void fixOrphanTables() throws IOException { @@ -1269,14 +1226,12 @@ List tmpList = new ArrayList(); tmpList.addAll(orphanTableDirs.keySet()); HTableDescriptor[] htds = getHTableDescriptors(tmpList); - Iterator>> iter = - orphanTableDirs.entrySet().iterator(); + Iterator>> iter = orphanTableDirs.entrySet().iterator(); int j = 0; int numFailedCase = 0; FSTableDescriptors fstd = new FSTableDescriptors(getConf()); while (iter.hasNext()) { - Entry> entry = - iter.next(); + Entry> entry = iter.next(); TableName tableName = entry.getKey(); LOG.info("Trying to fix orphan table error: " + tableName); if (j < htds.length) { @@ -1290,10 +1245,12 @@ } else { if (fabricateTableInfo(fstd, tableName, entry.getValue())) { LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file"); - LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName); + LOG.warn( + "Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName); iter.remove(); } else { - LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information"); + LOG.error("Unable to create default .tableinfo for " + tableName + + " while missing column family information"); numFailedCase++; } } @@ -1304,14 +1261,14 @@ // all orphanTableDirs are luckily recovered // re-run doFsck after recovering the .tableinfo file setShouldRerun(); - LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed"); + LOG.warn( + "Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed"); } else if (numFailedCase > 0) { - LOG.error("Failed to fix " + numFailedCase - + " OrphanTables with default .tableinfo files"); + LOG.error("Failed to fix " + numFailedCase + " OrphanTables with default .tableinfo files"); } } - //cleanup the list + // cleanup the list orphanTableDirs.clear(); } @@ -1319,7 +1276,6 @@ /** * This borrows code from MasterFileSystem.bootstrap(). Explicitly creates it's own WAL, so be * sure to close it as well as the region when you're finished. - * * @return an open hbase:meta HRegion */ private HRegion createNewMeta() throws IOException { @@ -1333,22 +1289,21 @@ Configuration confForWAL = new Configuration(c); confForWAL.set(HConstants.HBASE_DIR, rootdir.toString()); WAL wal = (new WALFactory(confForWAL, - Collections.singletonList(new MetricsWAL()), - "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))). - getWAL(metaHRI.getEncodedNameAsBytes(), metaHRI.getTable().getNamespace()); + Collections. singletonList(new MetricsWAL()), + "hbck-meta-recovery-" + RandomStringUtils.randomNumeric(8))) + .getWAL(metaHRI.getEncodedNameAsBytes(), metaHRI.getTable().getNamespace()); HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor, wal); MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true); return meta; } /** - * Generate set of puts to add to new meta. This expects the tables to be - * clean with no overlaps or holes. If there are any problems it returns null. - * + * Generate set of puts to add to new meta. This expects the tables to be clean with no overlaps + * or holes. If there are any problems it returns null. * @return An array list of puts to do in bulk, null if tables have problems */ - private ArrayList generatePuts( - SortedMap tablesInfo) throws IOException { + private ArrayList generatePuts(SortedMap tablesInfo) + throws IOException { ArrayList puts = new ArrayList(); boolean hasProblems = false; for (Entry e : tablesInfo.entrySet()) { @@ -1362,14 +1317,13 @@ TableInfo ti = e.getValue(); puts.add(MetaTableAccessor .makePutFromTableState(new TableState(ti.tableName, TableState.State.ENABLED))); - for (Entry> spl : ti.sc.getStarts().asMap() - .entrySet()) { + for (Entry> spl : ti.sc.getStarts().asMap().entrySet()) { Collection his = spl.getValue(); int sz = his.size(); if (sz != 1) { // problem - LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey()) - + " had " + sz + " regions instead of exactly 1." ); + LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey()) + " had " + sz + + " regions instead of exactly 1."); hasProblems = true; continue; } @@ -1387,8 +1341,7 @@ /** * Suggest fixes for each table */ - private void suggestFixes( - SortedMap tablesInfo) throws IOException { + private void suggestFixes(SortedMap tablesInfo) throws IOException { logParallelMerge(); for (TableInfo tInfo : tablesInfo.values()) { TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); @@ -1397,14 +1350,12 @@ } /** - * Rebuilds meta from information in hdfs/fs. Depends on configuration settings passed into - * hbck constructor to point to a particular fs/dir. Assumes HBase is OFFLINE. - * + * Rebuilds meta from information in hdfs/fs. Depends on configuration settings passed into hbck + * constructor to point to a particular fs/dir. Assumes HBase is OFFLINE. * @param fix flag that determines if method should attempt to fix holes * @return true if successful, false if attempt failed. */ - public boolean rebuildMeta(boolean fix) throws IOException, - InterruptedException { + public boolean rebuildMeta(boolean fix) throws IOException, InterruptedException { // TODO check to make sure hbase is offline. (or at least the table // currently being worked on is off line) @@ -1420,7 +1371,7 @@ // make sure ok. if (errors.getErrorList().size() != errs) { // While in error state, iterate until no more fixes possible - while(true) { + while (true) { fixes = 0; suggestFixes(tablesInfo); errors.clear(); @@ -1449,8 +1400,8 @@ // populate meta List puts = generatePuts(tablesInfo); if (puts == null) { - LOG.fatal("Problem encountered when creating new hbase:meta entries. " + - "You may need to restore the previously sidelined hbase:meta"); + LOG.fatal("Problem encountered when creating new hbase:meta entries. " + + "You may need to restore the previously sidelined hbase:meta"); return false; } meta.batchMutate(puts.toArray(new Put[puts.size()])); @@ -1468,23 +1419,22 @@ */ private void logParallelMerge() { if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) { - LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" + - " false to run serially."); + LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" + + " false to run serially."); } else { - LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" + - " true to run in parallel."); + LOG.info("Handling overlap merges serially. set hbasefsck.overlap.merge.parallel to" + + " true to run in parallel."); } } - private SortedMap checkHdfsIntegrity(boolean fixHoles, - boolean fixOverlaps) throws IOException { + private SortedMap checkHdfsIntegrity(boolean fixHoles, boolean fixOverlaps) + throws IOException { LOG.info("Checking HBase region split map from HDFS data..."); logParallelMerge(); for (TableInfo tInfo : tablesInfo.values()) { TableIntegrityErrorHandler handler; if (fixHoles || fixOverlaps) { - handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), - fixHoles, fixOverlaps); + handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(), fixHoles, fixOverlaps); } else { handler = tInfo.new IntegrityFixSuggester(tInfo, errors); } @@ -1500,8 +1450,7 @@ if (sidelineDir == null) { Path hbaseDir = FSUtils.getRootDir(getConf()); Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME); - sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" - + startMillis); + sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-" + startMillis); } return sidelineDir; } @@ -1515,14 +1464,12 @@ /** * Sideline a region dir (instead of deleting it) - * * @param parentDir if specified, the region will be sidelined to folder like - * {@literal .../parentDir//}. The purpose is to group together - * similar regions sidelined, for example, those regions should be bulk loaded back later - * on. If NULL, it is ignored. + * {@literal .../parentDir/
/}. The purpose is to group together + * similar regions sidelined, for example, those regions should be bulk loaded back later + * on. If NULL, it is ignored. */ - Path sidelineRegionDir(FileSystem fs, - String parentDir, HbckInfo hi) throws IOException { + Path sidelineRegionDir(FileSystem fs, String parentDir, HbckInfo hi) throws IOException { TableName tableName = hi.getTableName(); Path regionDir = hi.getHdfsRegionDir(); @@ -1535,22 +1482,22 @@ if (parentDir != null) { rootDir = new Path(rootDir, parentDir); } - Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName); + Path sidelineTableDir = FSUtils.getTableDir(rootDir, tableName); Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName()); fs.mkdirs(sidelineRegionDir); boolean success = false; - FileStatus[] cfs = fs.listStatus(regionDir); + FileStatus[] cfs = fs.listStatus(regionDir); if (cfs == null) { LOG.info("Region dir is empty: " + regionDir); } else { for (FileStatus cf : cfs) { Path src = cf.getPath(); - Path dst = new Path(sidelineRegionDir, src.getName()); + Path dst = new Path(sidelineRegionDir, src.getName()); if (fs.isFile(src)) { // simple file success = fs.rename(src, dst); if (!success) { - String msg = "Unable to rename file " + src + " to " + dst; + String msg = "Unable to rename file " + src + " to " + dst; LOG.error(msg); throw new IOException(msg); } @@ -1563,14 +1510,14 @@ LOG.info("Sidelining files from " + src + " into containing region " + dst); // FileSystem.rename is inconsistent with directories -- if the // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, - // it moves the src into the dst dir resulting in (foo/a/b). If + // it moves the src into the dst dir resulting in (foo/a/b). If // the dst does not exist, and the src a dir, src becomes dst. (foo/b) FileStatus[] hfiles = fs.listStatus(src); if (hfiles != null && hfiles.length > 0) { for (FileStatus hfile : hfiles) { success = fs.rename(hfile.getPath(), dst); if (!success) { - String msg = "Unable to rename file " + src + " to " + dst; + String msg = "Unable to rename file " + src + " to " + dst; LOG.error(msg); throw new IOException(msg); } @@ -1594,19 +1541,19 @@ /** * Side line an entire table. */ - void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir, - Path backupHbaseDir) throws IOException { + void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir, Path backupHbaseDir) + throws IOException { Path tableDir = FSUtils.getTableDir(hbaseDir, tableName); if (fs.exists(tableDir)) { - Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName); + Path backupTableDir = FSUtils.getTableDir(backupHbaseDir, tableName); fs.mkdirs(backupTableDir.getParent()); boolean success = fs.rename(tableDir, backupTableDir); if (!success) { - throw new IOException("Failed to move " + tableName + " from " - + tableDir + " to " + backupTableDir); + throw new IOException( + "Failed to move " + tableName + " from " + tableDir + " to " + backupTableDir); } } else { - LOG.info("No previous " + tableName + " exists. Continuing."); + LOG.info("No previous " + tableName + " exists. Continuing."); } } @@ -1623,9 +1570,10 @@ try { sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir); } catch (IOException e) { - LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore " - + "try to rename hbase:meta in " + backupDir.getName() + " to " - + hbaseDir.getName() + ".", e); + LOG.fatal("... failed to sideline meta. Currently in inconsistent state. To restore " + + "try to rename hbase:meta in " + backupDir.getName() + " to " + hbaseDir.getName() + + ".", + e); throw e; // throw original exception } return backupDir; @@ -1636,8 +1584,7 @@ * @throws ZooKeeperConnectionException * @throws IOException */ - private void loadTableStates() - throws IOException { + private void loadTableStates() throws IOException { tableStates = MetaTableAccessor.getTableStates(connection); } @@ -1646,14 +1593,12 @@ * @param tableName table to check status of */ private boolean isTableDisabled(TableName tableName) { - return tableStates.containsKey(tableName) - && tableStates.get(tableName) + return tableStates.containsKey(tableName) && tableStates.get(tableName) .inStates(TableState.State.DISABLED, TableState.State.DISABLING); } /** - * Scan HDFS for all regions, recording their information into - * regionInfoMap + * Scan HDFS for all regions, recording their information into regionInfoMap */ public void loadHdfsRegionDirs() throws IOException, InterruptedException { Path rootDir = FSUtils.getRootDir(getConf()); @@ -1667,45 +1612,43 @@ List paths = FSUtils.getTableDirs(fs, rootDir); for (Path path : paths) { TableName tableName = FSUtils.getTableName(path); - if ((!checkMetaOnly && - isTableIncluded(tableName)) || - tableName.equals(TableName.META_TABLE_NAME)) { - tableDirs.add(fs.getFileStatus(path)); - } + if ((!checkMetaOnly && isTableIncluded(tableName)) + || tableName.equals(TableName.META_TABLE_NAME)) { + tableDirs.add(fs.getFileStatus(path)); + } } // verify that version file exists if (!foundVersionFile) { errors.reportError(ERROR_CODE.NO_VERSION_FILE, - "Version file does not exist in root dir " + rootDir); + "Version file does not exist in root dir " + rootDir); if (shouldFixVersionFile()) { - LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME - + " file."); + LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME + " file."); setShouldRerun(); - FSUtils.setVersion(fs, rootDir, getConf().getInt( - HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt( - HConstants.VERSION_FILE_WRITE_ATTEMPTS, + FSUtils.setVersion(fs, rootDir, + getConf().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), + getConf().getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); } } - // level 1: /* + // level 1: /* List dirs = new ArrayList(tableDirs.size()); List> dirsFutures; for (FileStatus tableDir : tableDirs) { - LOG.debug("Loading region dirs from " +tableDir.getPath()); + LOG.debug("Loading region dirs from " + tableDir.getPath()); dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir)); } // Invoke and wait for Callables to complete dirsFutures = executor.invokeAll(dirs); - for(Future f: dirsFutures) { + for (Future f : dirsFutures) { try { f.get(); - } catch(ExecutionException e) { - LOG.warn("Could not load region dir " , e.getCause()); + } catch (ExecutionException e) { + LOG.warn("Could not load region dir ", e.getCause()); } } errors.print(""); @@ -1715,32 +1658,29 @@ * Record the location of the hbase:meta region as found in ZooKeeper. */ private boolean recordMetaRegion() throws IOException { - RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME, - HConstants.EMPTY_START_ROW, false, false); + RegionLocations rl = ((ClusterConnection) connection).locateRegion(TableName.META_TABLE_NAME, + HConstants.EMPTY_START_ROW, false, false); if (rl == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META region was not found in Zookeeper"); + errors.reportError(ERROR_CODE.NULL_META_REGION, "META region was not found in Zookeeper"); return false; } for (HRegionLocation metaLocation : rl.getRegionLocations()) { // Check if Meta region is valid and existing - if (metaLocation == null ) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META region location is null"); + if (metaLocation == null) { + errors.reportError(ERROR_CODE.NULL_META_REGION, "META region location is null"); return false; } if (metaLocation.getRegionInfo() == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META location regionInfo is null"); + errors.reportError(ERROR_CODE.NULL_META_REGION, "META location regionInfo is null"); return false; } if (metaLocation.getHostname() == null) { - errors.reportError(ERROR_CODE.NULL_META_REGION, - "META location hostName is null"); + errors.reportError(ERROR_CODE.NULL_META_REGION, "META location hostName is null"); return false; } ServerName sn = metaLocation.getServerName(); - MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime()); + MetaEntry m = + new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime()); HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName()); if (hbckInfo == null) { regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m)); @@ -1767,8 +1707,7 @@ }); } - private ServerName getMetaRegionServerName(int replicaId) - throws IOException, KeeperException { + private ServerName getMetaRegionServerName(int replicaId) throws IOException, KeeperException { ZooKeeperWatcher zkw = createZooKeeperWatcher(); ServerName sn = null; try { @@ -1785,26 +1724,25 @@ * @throws IOException if a remote or network exception occurs */ void processRegionServers(Collection regionServerList) - throws IOException, InterruptedException { + throws IOException, InterruptedException { List workItems = new ArrayList(regionServerList.size()); List> workFutures; // loop to contact each region server in parallel - for (ServerName rsinfo: regionServerList) { + for (ServerName rsinfo : regionServerList) { workItems.add(new WorkItemRegion(this, rsinfo, errors, connection)); } workFutures = executor.invokeAll(workItems); - for(int i=0; i f = workFutures.get(i); try { f.get(); - } catch(ExecutionException e) { - LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(), - e.getCause()); + } catch (ExecutionException e) { + LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(), e.getCause()); } } } @@ -1812,14 +1750,13 @@ /** * Check consistency of all regions that have been found in previous phases. */ - private void checkAndFixConsistency() - throws IOException, KeeperException, InterruptedException { + private void checkAndFixConsistency() throws IOException, KeeperException, InterruptedException { // Divide the checks in two phases. One for default/primary replicas and another // for the non-primary ones. Keeps code cleaner this way. List workItems = new ArrayList(regionInfoMap.size()); - for (java.util.Map.Entry e: regionInfoMap.entrySet()) { + for (java.util.Map.Entry e : regionInfoMap.entrySet()) { if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); } @@ -1827,12 +1764,12 @@ checkRegionConsistencyConcurrently(workItems); boolean prevHdfsCheck = shouldCheckHdfs(); - setCheckHdfs(false); //replicas don't have any hdfs data + setCheckHdfs(false); // replicas don't have any hdfs data // Run a pass over the replicas and fix any assignment issues that exist on the currently // deployed/undeployed replicas. List replicaWorkItems = new ArrayList(regionInfoMap.size()); - for (java.util.Map.Entry e: regionInfoMap.entrySet()) { + for (java.util.Map.Entry e : regionInfoMap.entrySet()) { if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue())); } @@ -1844,11 +1781,11 @@ // not get accurate state of the hbase if continuing. The config here allows users to tune // the tolerance of number of skipped region. // TODO: evaluate the consequence to continue the hbck operation without config. - int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0); + int terminateThreshold = getConf().getInt("hbase.hbck.skipped.regions.limit", 0); int numOfSkippedRegions = skippedRegions.size(); if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) { throw new IOException(numOfSkippedRegions - + " region(s) could not be checked or repaired. See logs for detail."); + + " region(s) could not be checked or repaired. See logs for detail."); } if (shouldCheckHdfs()) { @@ -1859,25 +1796,25 @@ /** * Check consistency of all regions using mulitple threads concurrently. */ - private void checkRegionConsistencyConcurrently( - final List workItems) - throws IOException, KeeperException, InterruptedException { + private void + checkRegionConsistencyConcurrently(final List workItems) + throws IOException, KeeperException, InterruptedException { if (workItems.isEmpty()) { - return; // nothing to check + return; // nothing to check } List> workFutures = executor.invokeAll(workItems); - for(Future f: workFutures) { + for (Future f : workFutures) { try { f.get(); - } catch(ExecutionException e1) { - LOG.warn("Could not check region consistency " , e1.getCause()); + } catch (ExecutionException e1) { + LOG.warn("Could not check region consistency ", e1.getCause()); if (e1.getCause() instanceof IOException) { - throw (IOException)e1.getCause(); + throw (IOException) e1.getCause(); } else if (e1.getCause() instanceof KeeperException) { - throw (KeeperException)e1.getCause(); + throw (KeeperException) e1.getCause(); } else if (e1.getCause() instanceof InterruptedException) { - throw (InterruptedException)e1.getCause(); + throw (InterruptedException) e1.getCause(); } else { throw new IOException(e1.getCause()); } @@ -1901,8 +1838,9 @@ } catch (Exception e) { // If the region is non-META region, skip this region and send warning/error message; if // the region is META region, we should not continue. - LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString() - + "'.", e); + LOG.warn( + "Unable to complete check or repair the region '" + hbi.getRegionNameAsString() + "'.", + e); if (hbi.getHdfsHRI().isMetaRegion()) { throw e; } @@ -1923,9 +1861,7 @@ } /** - * Check and fix table states, assumes full info available: - * - tableInfos - * - empty tables loaded + * Check and fix table states, assumes full info available: - tableInfos - empty tables loaded */ private void checkAndFixTableStates() throws IOException { // first check dangling states @@ -1933,19 +1869,17 @@ TableName tableName = entry.getKey(); TableState tableState = entry.getValue(); TableInfo tableInfo = tablesInfo.get(tableName); - if (isTableIncluded(tableName) - && !tableName.isSystemTable() - && tableInfo == null) { + if (isTableIncluded(tableName) && !tableName.isSystemTable() && tableInfo == null) { if (fixMeta) { MetaTableAccessor.deleteTableState(connection, tableName); TableState state = MetaTableAccessor.getTableState(connection, tableName); if (state != null) { errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, - tableName + " unable to delete dangling table state " + tableState); + tableName + " unable to delete dangling table state " + tableState); } } else { errors.reportError(ERROR_CODE.ORPHAN_TABLE_STATE, - tableName + " has dangling table state " + tableState); + tableName + " has dangling table state " + tableState); } } } @@ -1957,11 +1891,10 @@ TableState newState = MetaTableAccessor.getTableState(connection, tableName); if (newState == null) { errors.reportError(ERROR_CODE.NO_TABLE_STATE, - "Unable to change state for table " + tableName + " in meta "); + "Unable to change state for table " + tableName + " in meta "); } } else { - errors.reportError(ERROR_CODE.NO_TABLE_STATE, - tableName + " has no state in meta "); + errors.reportError(ERROR_CODE.NO_TABLE_STATE, tableName + " has no state in meta "); } } } @@ -1982,9 +1915,9 @@ FSUtils.checkAccess(ugi, file, FsAction.WRITE); } catch (AccessDeniedException ace) { LOG.warn("Got AccessDeniedException when preCheckPermission ", ace); - errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName() - + " does not have write perms to " + file.getPath() - + ". Please rerun hbck as hdfs user " + file.getOwner()); + errors.reportError(ERROR_CODE.WRONG_USAGE, + "Current user " + ugi.getUserName() + " does not have write perms to " + file.getPath() + + ". Please rerun hbck as hdfs user " + file.getOwner()); throw ace; } } @@ -2003,7 +1936,7 @@ private void deleteMetaRegion(byte[] metaKey) throws IOException { Delete d = new Delete(metaKey); meta.delete(d); - LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" ); + LOG.info("Deleted " + Bytes.toString(metaKey) + " from META"); } /** @@ -2023,23 +1956,22 @@ mutations.add(p); meta.mutateRow(mutations); - LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" ); + LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META"); } /** - * This backwards-compatibility wrapper for permanently offlining a region - * that should not be alive. If the region server does not support the - * "offline" method, it will use the closest unassign method instead. This - * will basically work until one attempts to disable or delete the affected - * table. The problem has to do with in-memory only master state, so - * restarting the HMaster or failing over to another should fix this. + * This backwards-compatibility wrapper for permanently offlining a region that should not be + * alive. If the region server does not support the "offline" method, it will use the closest + * unassign method instead. This will basically work until one attempts to disable or delete the + * affected table. The problem has to do with in-memory only master state, so restarting the + * HMaster or failing over to another should fix this. */ private void offline(byte[] regionName) throws IOException { String regionString = Bytes.toStringBinary(regionName); if (!rsSupportsOffline) { - LOG.warn("Using unassign region " + regionString - + " instead of using offline method, you should" - + " restart HMaster after these repairs"); + LOG.warn( + "Using unassign region " + regionString + " instead of using offline method, you should" + + " restart HMaster after these repairs"); admin.unassign(regionName, true); return; } @@ -2049,12 +1981,12 @@ LOG.info("Offlining region " + regionString); admin.offline(regionName); } catch (IOException ioe) { - String notFoundMsg = "java.lang.NoSuchMethodException: " + - "org.apache.hadoop.hbase.master.HMaster.offline([B)"; + String notFoundMsg = "java.lang.NoSuchMethodException: " + + "org.apache.hadoop.hbase.master.HMaster.offline([B)"; if (ioe.getMessage().contains(notFoundMsg)) { - LOG.warn("Using unassign region " + regionString - + " instead of using offline method, you should" - + " restart HMaster after these repairs"); + LOG.warn( + "Using unassign region " + regionString + " instead of using offline method, you should" + + " restart HMaster after these repairs"); rsSupportsOffline = false; // in the future just use unassign admin.unassign(regionName, true); return; @@ -2072,13 +2004,13 @@ int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication(); for (int i = 1; i < numReplicas; i++) { if (hi.getPrimaryHRIForDeployedReplica() == null) continue; - HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica( - hi.getPrimaryHRIForDeployedReplica(), i); + HRegionInfo hri = + RegionReplicaUtil.getRegionInfoForReplica(hi.getPrimaryHRIForDeployedReplica(), i); HbckInfo h = regionInfoMap.get(hri.getEncodedName()); if (h != null) { undeployRegionsForHbi(h); - //set skip checks; we undeployed it, and we don't want to evaluate this anymore - //in consistency checks + // set skip checks; we undeployed it, and we don't want to evaluate this anymore + // in consistency checks h.setSkipChecks(true); } } @@ -2086,28 +2018,26 @@ private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException { for (OnlineEntry rse : hi.deployedEntries) { - LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa); + LOG.debug("Undeploy region " + rse.hri + " from " + rse.hsa); try { HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri); offline(rse.hri.getRegionName()); } catch (IOException ioe) { LOG.warn("Got exception when attempting to offline region " - + Bytes.toString(rse.hri.getRegionName()), ioe); + + Bytes.toString(rse.hri.getRegionName()), + ioe); } } } /** - * Attempts to undeploy a region from a region server based in information in - * META. Any operations that modify the file system should make sure that - * its corresponding region is not deployed to prevent data races. - * - * A separate call is required to update the master in-memory region state - * kept in the AssignementManager. Because disable uses this state instead of - * that found in META, we can't seem to cleanly disable/delete tables that - * have been hbck fixed. When used on a version of HBase that does not have - * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master - * restart or failover may be required. + * Attempts to undeploy a region from a region server based in information in META. Any operations + * that modify the file system should make sure that its corresponding region is not deployed to + * prevent data races. A separate call is required to update the master in-memory region state + * kept in the AssignementManager. Because disable uses this state instead of that found in META, + * we can't seem to cleanly disable/delete tables that have been hbck fixed. When used on a + * version of HBase that does not have the offline ipc call exposed on the master (<0.90.5, + * <0.92.0) a master restart or failover may be required. */ private void closeRegion(HbckInfo hi) throws IOException, InterruptedException { if (hi.metaEntry == null && hi.hdfsEntry == null) { @@ -2131,25 +2061,22 @@ Result r = meta.get(get); RegionLocations rl = MetaTableAccessor.getRegionLocations(r); if (rl == null) { - LOG.warn("Unable to close region " + hi.getRegionNameAsString() + - " since meta does not have handle to reach it"); + LOG.warn("Unable to close region " + hi.getRegionNameAsString() + + " since meta does not have handle to reach it"); return; } for (HRegionLocation h : rl.getRegionLocations()) { ServerName serverName = h.getServerName(); if (serverName == null) { - errors.reportError("Unable to close region " - + hi.getRegionNameAsString() + " because meta does not " - + "have handle to reach it."); + errors.reportError("Unable to close region " + hi.getRegionNameAsString() + + " because meta does not " + "have handle to reach it."); continue; } HRegionInfo hri = h.getRegionInfo(); if (hri == null) { LOG.warn("Unable to close region " + hi.getRegionNameAsString() - + " because hbase:meta had invalid or missing " - + HConstants.CATALOG_FAMILY_STR + ":" - + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) - + " qualifier value."); + + " because hbase:meta had invalid or missing " + HConstants.CATALOG_FAMILY_STR + ":" + + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) + " qualifier value."); continue; } // close the region -- close files and remove assignment @@ -2157,8 +2084,8 @@ } } - private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException, - KeeperException, InterruptedException { + private void tryAssignmentRepair(HbckInfo hbi, String msg) + throws IOException, KeeperException, InterruptedException { // If we are trying to fix the errors if (shouldFixAssignments()) { errors.print(msg); @@ -2179,8 +2106,8 @@ HbckInfo h = regionInfoMap.get(hri.getEncodedName()); if (h != null) { undeployRegions(h); - //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore - //in consistency checks + // set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore + // in consistency checks h.setSkipChecks(true); } HBaseFsckRepair.fixUnassigned(admin, hri); @@ -2194,7 +2121,7 @@ * Check a single region for consistency and correct deployment. */ private void checkRegionConsistency(final String key, final HbckInfo hbi) - throws IOException, KeeperException, InterruptedException { + throws IOException, KeeperException, InterruptedException { if (hbi.isSkipChecks()) return; String descriptiveName = hbi.toString(); @@ -2204,14 +2131,12 @@ boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null; boolean isDeployed = !hbi.deployedOn.isEmpty(); boolean isMultiplyDeployed = hbi.deployedOn.size() > 1; - boolean deploymentMatchesMeta = - hasMetaAssignment && isDeployed && !isMultiplyDeployed && - hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0)); - boolean splitParent = - inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline(); + boolean deploymentMatchesMeta = hasMetaAssignment && isDeployed && !isMultiplyDeployed + && hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0)); + boolean splitParent = inMeta && hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline(); boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry.getTable()); - boolean recentlyModified = inHdfs && - hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime(); + boolean recentlyModified = + inHdfs && hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime(); // ========== First the healthy cases ============= if (hbi.containsOnlyHdfsEdits()) { @@ -2220,8 +2145,8 @@ if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) { return; } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) { - LOG.info("Region " + descriptiveName + " is in META, and in a disabled " + - "tabled that is not deployed"); + LOG.info("Region " + descriptiveName + " is in META, and in a disabled " + + "tabled that is not deployed"); return; } else if (recentlyModified) { LOG.warn("Region " + descriptiveName + " was recently modified -- skipping"); @@ -2232,9 +2157,9 @@ // We shouldn't have record of this region at all then! assert false : "Entry for region with no data"; } else if (!inMeta && !inHdfs && isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region " - + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " + - "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, + "Region " + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " + + "deployed on " + Joiner.on(", ").join(hbi.deployedOn)); if (shouldFixAssignments()) { undeployRegions(hbi); } @@ -2248,15 +2173,13 @@ + " got merge recently, its file(s) will be cleaned by CatalogJanitor later"); return; } - errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " - + descriptiveName + " on HDFS, but not listed in hbase:meta " + - "or deployed on any region server"); + errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region " + descriptiveName + + " on HDFS, but not listed in hbase:meta " + "or deployed on any region server"); // restore region consistency of an adopted orphan if (shouldFixMeta()) { if (!hbi.isHdfsRegioninfoPresent()) { LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired" - + " in table integrity repair phase if -fixHdfsOrphans was" + - " used."); + + " in table integrity repair phase if -fixHdfsOrphans was" + " used."); return; } @@ -2265,10 +2188,10 @@ for (HRegionInfo region : tableInfo.getRegionsFromMeta()) { if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0 - && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(), - hri.getEndKey()) >= 0) + && (region.getEndKey().length == 0 + || Bytes.compareTo(region.getEndKey(), hri.getEndKey()) >= 0) && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) { - if(region.isSplit() || region.isOffline()) continue; + if (region.isSplit() || region.isOffline()) continue; Path regionDir = hbi.getHdfsRegionDir(); FileSystem fs = regionDir.getFileSystem(getConf()); List familyDirs = FSUtils.getFamilyDirs(fs, regionDir); @@ -2281,8 +2204,8 @@ LOG.warn(hri + " start and stop keys are in the range of " + region + ". The region might not be cleaned up from hdfs when region " + region + " split failed. Hence deleting from hdfs."); - HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, - regionDir.getParent(), hri); + HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs, regionDir.getParent(), + hri); return; } } @@ -2292,7 +2215,7 @@ LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI()); int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), - admin.getClusterStatus().getServers(), numReplicas); + admin.getClusterStatus().getServers(), numReplicas); tryAssignmentRepair(hbi, "Trying to reassign region..."); } @@ -2319,11 +2242,11 @@ LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI()); int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication(); HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(), - admin.getClusterStatus().getServers(), numReplicas); + admin.getClusterStatus().getServers(), numReplicas); tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); } - // ========== Cases where the region is in hbase:meta ============= + // ========== Cases where the region is in hbase:meta ============= } else if (inMeta && inHdfs && !isDeployed && splitParent) { // check whether this is an actual error, or just transient state where parent // is not cleaned @@ -2337,26 +2260,25 @@ return; } } - errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region " - + descriptiveName + " is a split parent in META, in HDFS, " - + "and not deployed on any region server. This could be transient."); + errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, + "Region " + descriptiveName + " is a split parent in META, in HDFS, " + + "and not deployed on any region server. This could be transient."); if (shouldFixSplitParents()) { setShouldRerun(); resetSplitParent(hbi); } } else if (inMeta && !inHdfs && !isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " - + descriptiveName + " found in META, but not in HDFS " - + "or deployed on any region server."); + errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region " + descriptiveName + + " found in META, but not in HDFS " + "or deployed on any region server."); if (shouldFixMeta()) { deleteMetaRegion(hbi); } } else if (inMeta && !inHdfs && isDeployed) { - errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName - + " found in META, but not in HDFS, " + - "and deployed on " + Joiner.on(", ").join(hbi.deployedOn)); - // We treat HDFS as ground truth. Any information in meta is transient - // and equivalent data can be regenerated. So, lets unassign and remove + errors.reportError(ERROR_CODE.NOT_IN_HDFS, + "Region " + descriptiveName + " found in META, but not in HDFS, " + "and deployed on " + + Joiner.on(", ").join(hbi.deployedOn)); + // We treat HDFS as ground truth. Any information in meta is transient + // and equivalent data can be regenerated. So, lets unassign and remove // these problems from META. if (shouldFixAssignments()) { errors.print("Trying to fix unassigned region..."); @@ -2367,23 +2289,23 @@ deleteMetaRegion(hbi); } } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) { - errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName - + " not deployed on any region server."); + errors.reportError(ERROR_CODE.NOT_DEPLOYED, + "Region " + descriptiveName + " not deployed on any region server."); tryAssignmentRepair(hbi, "Trying to fix unassigned region..."); } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) { errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, - "Region " + descriptiveName + " should not be deployed according " + - "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); + "Region " + descriptiveName + " should not be deployed according " + + "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn)); if (shouldFixAssignments()) { errors.print("Trying to close the region " + descriptiveName); setShouldRerun(); HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); } } else if (inMeta && inHdfs && isMultiplyDeployed) { - errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName - + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer - + " but is multiply assigned to region servers " + - Joiner.on(", ").join(hbi.deployedOn)); + errors.reportError(ERROR_CODE.MULTI_DEPLOYED, + "Region " + descriptiveName + " is listed in hbase:meta on region server " + + hbi.metaEntry.regionServer + " but is multiply assigned to region servers " + + Joiner.on(", ").join(hbi.deployedOn)); // If we are trying to fix the errors if (shouldFixAssignments()) { errors.print("Trying to fix assignment error..."); @@ -2391,10 +2313,9 @@ HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn); } } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) { - errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region " - + descriptiveName + " listed in hbase:meta on region server " + - hbi.metaEntry.regionServer + " but found on region server " + - hbi.deployedOn.get(0)); + errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, + "Region " + descriptiveName + " listed in hbase:meta on region server " + + hbi.metaEntry.regionServer + " but found on region server " + hbi.deployedOn.get(0)); // If we are trying to fix the errors if (shouldFixAssignments()) { errors.print("Trying to fix assignment error..."); @@ -2403,25 +2324,21 @@ HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI()); } } else { - errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName + - " is in an unforeseen state:" + - " inMeta=" + inMeta + - " inHdfs=" + inHdfs + - " isDeployed=" + isDeployed + - " isMultiplyDeployed=" + isMultiplyDeployed + - " deploymentMatchesMeta=" + deploymentMatchesMeta + - " shouldBeDeployed=" + shouldBeDeployed); + errors.reportError(ERROR_CODE.UNKNOWN, + "Region " + descriptiveName + " is in an unforeseen state:" + " inMeta=" + inMeta + + " inHdfs=" + inHdfs + " isDeployed=" + isDeployed + " isMultiplyDeployed=" + + isMultiplyDeployed + " deploymentMatchesMeta=" + deploymentMatchesMeta + + " shouldBeDeployed=" + shouldBeDeployed); } } /** - * Checks tables integrity. Goes over all regions and scans the tables. - * Collects all the pieces for each table and checks if there are missing, - * repeated or overlapping ones. + * Checks tables integrity. Goes over all regions and scans the tables. Collects all the pieces + * for each table and checks if there are missing, repeated or overlapping ones. * @throws IOException */ SortedMap checkIntegrity() throws IOException { - tablesInfo = new TreeMap (); + tablesInfo = new TreeMap(); LOG.debug("There are " + regionInfoMap.size() + " region info entries"); for (HbckInfo hbi : regionInfoMap.values()) { // Check only valid, working regions @@ -2451,8 +2368,8 @@ // Missing regionDir or over-deployment is checked elsewhere. Include // these cases in modTInfo, so we can evaluate those regions as part of // the region chain in META - //if (hbi.foundRegionDir == null) continue; - //if (hbi.deployedOn.size() != 1) continue; + // if (hbi.foundRegionDir == null) continue; + // if (hbi.deployedOn.size() != 1) continue; if (hbi.deployedOn.size() == 0) continue; // We should be safe here @@ -2484,8 +2401,9 @@ return tablesInfo; } - /** Loads table info's for tables that may not have been included, since there are no - * regions reported for the table, but table dir is there in hdfs + /** + * Loads table info's for tables that may not have been included, since there are no regions + * reported for the table, but table dir is there in hdfs */ private void loadTableInfosForTablesWithNoRegion() throws IOException { Map allTables = new FSTableDescriptors(getConf()).getAll(); @@ -2519,7 +2437,7 @@ try { dirs = fs.listStatus(contained.getHdfsRegionDir()); } catch (FileNotFoundException fnfe) { - // region we are attempting to merge in is not present! Since this is a merge, there is + // region we are attempting to merge in is not present! Since this is a merge, there is // no harm skipping this region if it does not exist. if (!fs.exists(contained.getHdfsRegionDir())) { LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir() @@ -2542,7 +2460,7 @@ for (FileStatus cf : dirs) { Path src = cf.getPath(); - Path dst = new Path(targetRegionDir, src.getName()); + Path dst = new Path(targetRegionDir, src.getName()); if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) { // do not copy the old .regioninfo file. @@ -2557,7 +2475,7 @@ LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst); // FileSystem.rename is inconsistent with directories -- if the // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir, - // it moves the src into the dst dir resulting in (foo/a/b). If + // it moves the src into the dst dir resulting in (foo/a/b). If // the dst does not exist, and the src a dir, src becomes dst. (foo/b) for (FileStatus hfile : fs.listStatus(src)) { boolean success = fs.rename(hfile.getPath(), dst); @@ -2571,13 +2489,12 @@ // if all success. sidelineRegionDir(fs, contained); - LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " + - getSidelineDir()); + LOG.info("[" + thread + "] Sidelined region dir " + contained.getHdfsRegionDir() + " into " + + getSidelineDir()); debugLsr(contained.getHdfsRegionDir()); return fileMoves; } - static class WorkItemOverlapMerge implements Callable { private TableIntegrityErrorHandler handler; @@ -2595,13 +2512,12 @@ } }; - /** * Maintain information about a particular table. */ public class TableInfo { TableName tableName; - TreeSet deployedOn; + TreeSet deployedOn; // backwards regions final List backwards = new ArrayList(); @@ -2612,30 +2528,30 @@ // region split calculator final RegionSplitCalculator sc = new RegionSplitCalculator(cmp); - // Histogram of different HTableDescriptors found. Ideally there is only one! + // Histogram of different HTableDescriptors found. Ideally there is only one! final Set htds = new HashSet(); // key = start split, values = set of splits in problem group final Multimap overlapGroups = - TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); + TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp); // list of regions derived from meta entries. private ImmutableList regionsFromMeta = null; TableInfo(TableName name) { this.tableName = name; - deployedOn = new TreeSet (); + deployedOn = new TreeSet(); } /** - * @return descriptor common to all regions. null if are none or multiple! + * @return descriptor common to all regions. null if are none or multiple! */ private HTableDescriptor getHTD() { if (htds.size() == 1) { - return (HTableDescriptor)htds.toArray()[0]; + return (HTableDescriptor) htds.toArray()[0]; } else { - LOG.error("None/Multiple table descriptors found for table '" - + tableName + "' regions: " + htds); + LOG.error( + "None/Multiple table descriptors found for table '" + tableName + "' regions: " + htds); } return null; } @@ -2650,12 +2566,11 @@ // if not the absolute end key, check for cycle if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) { - errors.reportError( - ERROR_CODE.REGION_CYCLE, - String.format("The endkey for this region comes before the " - + "startkey, startkey=%s, endkey=%s", - Bytes.toStringBinary(hir.getStartKey()), - Bytes.toStringBinary(hir.getEndKey())), this, hir); + errors.reportError(ERROR_CODE.REGION_CYCLE, + String.format( + "The endkey for this region comes before the " + "startkey, startkey=%s, endkey=%s", + Bytes.toStringBinary(hir.getStartKey()), Bytes.toStringBinary(hir.getEndKey())), + this, hir); backwards.add(hir); return; } @@ -2703,76 +2618,69 @@ } @Override - public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{ + public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException { errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, - "First region should start with an empty key. You need to " - + " create a new region and regioninfo in HDFS to plug the hole.", - getTableInfo(), hi); + "First region should start with an empty key. You need to " + + " create a new region and regioninfo in HDFS to plug the hole.", + getTableInfo(), hi); } @Override public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, - "Last region should end with an empty key. You need to " - + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo()); + "Last region should end with an empty key. You need to " + + "create a new region and regioninfo in HDFS to plug the hole.", + getTableInfo()); } @Override - public void handleDegenerateRegion(HbckInfo hi) throws IOException{ - errors.reportError(ERROR_CODE.DEGENERATE_REGION, - "Region has the same start and end key.", getTableInfo(), hi); + public void handleDegenerateRegion(HbckInfo hi) throws IOException { + errors.reportError(ERROR_CODE.DEGENERATE_REGION, "Region has the same start and end key.", + getTableInfo(), hi); } @Override - public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{ + public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException { byte[] key = r1.getStartKey(); // dup start key errors.reportError(ERROR_CODE.DUPE_STARTKEYS, - "Multiple regions have the same startkey: " - + Bytes.toStringBinary(key), getTableInfo(), r1); + "Multiple regions have the same startkey: " + Bytes.toStringBinary(key), getTableInfo(), + r1); errors.reportError(ERROR_CODE.DUPE_STARTKEYS, - "Multiple regions have the same startkey: " - + Bytes.toStringBinary(key), getTableInfo(), r2); + "Multiple regions have the same startkey: " + Bytes.toStringBinary(key), getTableInfo(), + r2); } @Override - public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{ + public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException { errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN, - "There is an overlap in the region chain.", - getTableInfo(), hi1, hi2); + "There is an overlap in the region chain.", getTableInfo(), hi1, hi2); } @Override - public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{ - errors.reportError( - ERROR_CODE.HOLE_IN_REGION_CHAIN, - "There is a hole in the region chain between " - + Bytes.toStringBinary(holeStart) + " and " - + Bytes.toStringBinary(holeStop) - + ". You need to create a new .regioninfo and region " - + "dir in hdfs to plug the hole."); + public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException { + errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN, + "There is a hole in the region chain between " + Bytes.toStringBinary(holeStart) + " and " + + Bytes.toStringBinary(holeStop) + + ". You need to create a new .regioninfo and region " + + "dir in hdfs to plug the hole."); } }; /** - * This handler fixes integrity errors from hdfs information. There are - * basically three classes of integrity problems 1) holes, 2) overlaps, and - * 3) invalid regions. - * - * This class overrides methods that fix holes and the overlap group case. - * Individual cases of particular overlaps are handled by the general - * overlap group merge repair case. - * - * If hbase is online, this forces regions offline before doing merge - * operations. + * This handler fixes integrity errors from hdfs information. There are basically three classes + * of integrity problems 1) holes, 2) overlaps, and 3) invalid regions. This class overrides + * methods that fix holes and the overlap group case. Individual cases of particular overlaps + * are handled by the general overlap group merge repair case. If hbase is online, this forces + * regions offline before doing merge operations. */ private class HDFSIntegrityFixer extends IntegrityFixSuggester { Configuration conf; boolean fixOverlaps = true; - HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf, - boolean fixHoles, boolean fixOverlaps) { + HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf, boolean fixHoles, + boolean fixOverlaps) { super(ti, errors); this.conf = conf; this.fixOverlaps = fixOverlaps; @@ -2780,80 +2688,73 @@ } /** - * This is a special case hole -- when the first region of a table is - * missing from META, HBase doesn't acknowledge the existance of the - * table. + * This is a special case hole -- when the first region of a table is missing from META, HBase + * doesn't acknowledge the existance of the table. */ @Override public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException { errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, - "First region should start with an empty key. Creating a new " + - "region and regioninfo in HDFS to plug the hole.", - getTableInfo(), next); + "First region should start with an empty key. Creating a new " + + "region and regioninfo in HDFS to plug the hole.", + getTableInfo(), next); HTableDescriptor htd = getTableInfo().getHTD(); // from special EMPTY_START_ROW to next region's startKey - HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), - HConstants.EMPTY_START_ROW, next.getStartKey()); + HRegionInfo newRegion = + new HRegionInfo(htd.getTableName(), HConstants.EMPTY_START_ROW, next.getStartKey()); // TODO test HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); - LOG.info("Table region start key was not empty. Created new empty region: " - + newRegion + " " +region); + LOG.info("Table region start key was not empty. Created new empty region: " + newRegion + + " " + region); fixes++; } @Override public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, - "Last region should end with an empty key. Creating a new " - + "region and regioninfo in HDFS to plug the hole.", getTableInfo()); + "Last region should end with an empty key. Creating a new " + + "region and regioninfo in HDFS to plug the hole.", + getTableInfo()); HTableDescriptor htd = getTableInfo().getHTD(); // from curEndKey to EMPTY_START_ROW - HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey, - HConstants.EMPTY_START_ROW); + HRegionInfo newRegion = + new HRegionInfo(htd.getTableName(), curEndKey, HConstants.EMPTY_START_ROW); HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); - LOG.info("Table region end key was not empty. Created new empty region: " + newRegion - + " " + region); + LOG.info("Table region end key was not empty. Created new empty region: " + newRegion + " " + + region); fixes++; } /** - * There is a hole in the hdfs regions that violates the table integrity - * rules. Create a new empty region that patches the hole. + * There is a hole in the hdfs regions that violates the table integrity rules. Create a new + * empty region that patches the hole. */ @Override - public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException { - errors.reportError( - ERROR_CODE.HOLE_IN_REGION_CHAIN, - "There is a hole in the region chain between " - + Bytes.toStringBinary(holeStartKey) + " and " - + Bytes.toStringBinary(holeStopKey) - + ". Creating a new regioninfo and region " - + "dir in hdfs to plug the hole."); + public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) + throws IOException { + errors.reportError(ERROR_CODE.HOLE_IN_REGION_CHAIN, + "There is a hole in the region chain between " + Bytes.toStringBinary(holeStartKey) + + " and " + Bytes.toStringBinary(holeStopKey) + + ". Creating a new regioninfo and region " + "dir in hdfs to plug the hole."); HTableDescriptor htd = getTableInfo().getHTD(); HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey); HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); - LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region); + LOG.info("Plugged hole by creating new empty region: " + newRegion + " " + region); fixes++; } /** - * This takes set of overlapping regions and merges them into a single - * region. This covers cases like degenerate regions, shared start key, - * general overlaps, duplicate ranges, and partial overlapping regions. - * - * Cases: - * - Clean regions that overlap - * - Only .oldlogs regions (can't find start/stop range, or figure out) - * - * This is basically threadsafe, except for the fixer increment in mergeOverlaps. + * This takes set of overlapping regions and merges them into a single region. This covers + * cases like degenerate regions, shared start key, general overlaps, duplicate ranges, and + * partial overlapping regions. Cases: - Clean regions that overlap - Only .oldlogs regions + * (can't find start/stop range, or figure out) This is basically threadsafe, except for the + * fixer increment in mergeOverlaps. */ @Override - public void handleOverlapGroup(Collection overlap) - throws IOException { + public void handleOverlapGroup(Collection overlap) throws IOException { Preconditions.checkNotNull(overlap); - Preconditions.checkArgument(overlap.size() >0); + Preconditions.checkArgument(overlap.size() > 0); if (!this.fixOverlaps) { LOG.warn("Not attempting to repair overlaps."); @@ -2861,10 +2762,12 @@ } if (overlap.size() > maxMerge) { - LOG.warn("Overlap group has " + overlap.size() + " overlapping " + - "regions which is greater than " + maxMerge + ", the max number of regions to merge"); + LOG.warn("Overlap group has " + overlap.size() + " overlapping " + + "regions which is greater than " + maxMerge + + ", the max number of regions to merge"); if (sidelineBigOverlaps) { - // we only sideline big overlapped groups that exceeds the max number of regions to merge + // we only sideline big overlapped groups that exceeds the max number of regions to + // merge sidelineBigOverlaps(overlap); } return; @@ -2873,39 +2776,38 @@ mergeOverlaps(overlap); } - void mergeOverlaps(Collection overlap) - throws IOException { + void mergeOverlaps(Collection overlap) throws IOException { String thread = Thread.currentThread().getName(); - LOG.info("== [" + thread + "] Merging regions into one region: " - + Joiner.on(",").join(overlap)); + LOG.info( + "== [" + thread + "] Merging regions into one region: " + Joiner.on(",").join(overlap)); // get the min / max range and close all concerned regions Pair range = null; for (HbckInfo hi : overlap) { if (range == null) { range = new Pair(hi.getStartKey(), hi.getEndKey()); } else { - if (RegionSplitCalculator.BYTES_COMPARATOR - .compare(hi.getStartKey(), range.getFirst()) < 0) { + if (RegionSplitCalculator.BYTES_COMPARATOR.compare(hi.getStartKey(), + range.getFirst()) < 0) { range.setFirst(hi.getStartKey()); } - if (RegionSplitCalculator.BYTES_COMPARATOR - .compare(hi.getEndKey(), range.getSecond()) > 0) { + if (RegionSplitCalculator.BYTES_COMPARATOR.compare(hi.getEndKey(), + range.getSecond()) > 0) { range.setSecond(hi.getEndKey()); } } // need to close files so delete can happen. - LOG.debug("[" + thread + "] Closing region before moving data around: " + hi); + LOG.debug("[" + thread + "] Closing region before moving data around: " + hi); LOG.debug("[" + thread + "] Contained region dir before close"); debugLsr(hi.getHdfsRegionDir()); try { LOG.info("[" + thread + "] Closing region: " + hi); closeRegion(hi); } catch (IOException ioe) { - LOG.warn("[" + thread + "] Was unable to close region " + hi - + ". Just continuing... ", ioe); + LOG.warn("[" + thread + "] Was unable to close region " + hi + ". Just continuing... ", + ioe); } catch (InterruptedException e) { - LOG.warn("[" + thread + "] Was unable to close region " + hi - + ". Just continuing... ", e); + LOG.warn("[" + thread + "] Was unable to close region " + hi + ". Just continuing... ", + e); } try { @@ -2913,25 +2815,26 @@ offline(hi.getRegionName()); } catch (IOException ioe) { LOG.warn("[" + thread + "] Unable to offline region from master: " + hi - + ". Just continuing... ", ioe); + + ". Just continuing... ", + ioe); } } // create new empty container region. HTableDescriptor htd = getTableInfo().getHTD(); // from start key to end Key - HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(), - range.getSecond()); + HRegionInfo newRegion = + new HRegionInfo(htd.getTableName(), range.getFirst(), range.getSecond()); HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd); - LOG.info("[" + thread + "] Created new empty container region: " + - newRegion + " to contain regions: " + Joiner.on(",").join(overlap)); + LOG.info("[" + thread + "] Created new empty container region: " + newRegion + + " to contain regions: " + Joiner.on(",").join(overlap)); debugLsr(region.getRegionFileSystem().getRegionDir()); // all target regions are closed, should be able to safely cleanup. - boolean didFix= false; + boolean didFix = false; Path target = region.getRegionFileSystem().getRegionDir(); for (HbckInfo contained : overlap) { - LOG.info("[" + thread + "] Merging " + contained + " into " + target ); + LOG.info("[" + thread + "] Merging " + contained + " into " + target); int merges = mergeRegionDirs(target, contained); if (merges > 0) { didFix = true; @@ -2943,31 +2846,29 @@ } /** - * Sideline some regions in a big overlap group so that it - * will have fewer regions, and it is easier to merge them later on. - * + * Sideline some regions in a big overlap group so that it will have fewer regions, and it is + * easier to merge them later on. * @param bigOverlap the overlapped group with regions more than maxMerge * @throws IOException */ - void sidelineBigOverlaps( - Collection bigOverlap) throws IOException { + void sidelineBigOverlaps(Collection bigOverlap) throws IOException { int overlapsToSideline = bigOverlap.size() - maxMerge; if (overlapsToSideline > maxOverlapsToSideline) { overlapsToSideline = maxOverlapsToSideline; } List regionsToSideline = - RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline); + RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline); FileSystem fs = FileSystem.get(conf); - for (HbckInfo regionToSideline: regionsToSideline) { + for (HbckInfo regionToSideline : regionsToSideline) { try { LOG.info("Closing region: " + regionToSideline); closeRegion(regionToSideline); } catch (IOException ioe) { - LOG.warn("Was unable to close region " + regionToSideline - + ". Just continuing... ", ioe); + LOG.warn("Was unable to close region " + regionToSideline + ". Just continuing... ", + ioe); } catch (InterruptedException e) { - LOG.warn("Was unable to close region " + regionToSideline - + ". Just continuing... ", e); + LOG.warn("Was unable to close region " + regionToSideline + ". Just continuing... ", + e); } try { @@ -2975,7 +2876,8 @@ offline(regionToSideline.getRegionName()); } catch (IOException ioe) { LOG.warn("Unable to offline region from master: " + regionToSideline - + ". Just continuing... ", ioe); + + ". Just continuing... ", + ioe); } LOG.info("Before sideline big overlapped region: " + regionToSideline.toString()); @@ -2983,8 +2885,7 @@ if (sidelineRegionDir != null) { sidelinedRegions.put(sidelineRegionDir, regionToSideline); LOG.info("After sidelined big overlapped region: " - + regionToSideline.getRegionNameAsString() - + " to " + sidelineRegionDir.toString()); + + regionToSideline.getRegionNameAsString() + " to " + sidelineRegionDir.toString()); fixes++; } } @@ -2992,8 +2893,8 @@ } /** - * Check the region chain (from META) of this table. We are looking for - * holes, overlaps, and cycles. + * Check the region chain (from META) of this table. We are looking for holes, overlaps, and + * cycles. * @return false if there are errors * @throws IOException */ @@ -3029,7 +2930,7 @@ // special endkey case converts '' to null byte[] endKey = rng.getEndKey(); endKey = (endKey.length == 0) ? null : endKey; - if (Bytes.equals(rng.getStartKey(),endKey)) { + if (Bytes.equals(rng.getStartKey(), endKey)) { handler.handleDegenerateRegion(rng); } } @@ -3052,14 +2953,14 @@ // record errors ArrayList subRange = new ArrayList(ranges); - // this dumb and n^2 but this shouldn't happen often + // this dumb and n^2 but this shouldn't happen often for (HbckInfo r1 : ranges) { if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue; subRange.remove(r1); for (HbckInfo r2 : subRange) { if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue; - if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) { - handler.handleDuplicateStartKeys(r1,r2); + if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey()) == 0) { + handler.handleDuplicateStartKeys(r1, r2); } else { // overlap handler.handleOverlapInRegionChain(r1, r2); @@ -3103,20 +3004,16 @@ if (details) { // do full region split map dump - errors.print("---- Table '" + this.tableName - + "': region split map"); + errors.print("---- Table '" + this.tableName + "': region split map"); dump(splits, regions); - errors.print("---- Table '" + this.tableName - + "': overlap groups"); + errors.print("---- Table '" + this.tableName + "': overlap groups"); dumpOverlapProblems(overlapGroups); - errors.print("There are " + overlapGroups.keySet().size() - + " overlap groups with " + overlapGroups.size() - + " overlapping regions"); + errors.print("There are " + overlapGroups.keySet().size() + " overlap groups with " + + overlapGroups.size() + " overlapping regions"); } if (!sidelinedRegions.isEmpty()) { LOG.warn("Sidelined big overlapped regions, please bulk load them!"); - errors.print("---- Table '" + this.tableName - + "': sidelined big overlapped regions"); + errors.print("---- Table '" + this.tableName + "': sidelined big overlapped regions"); dumpSidelinedRegions(sidelinedRegions); } return errors.getErrorList().size() == originalErrorsCount; @@ -3124,7 +3021,7 @@ private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey) throws IOException { - // we parallelize overlap handler for the case we have lots of groups to fix. We can + // we parallelize overlap handler for the case we have lots of groups to fix. We can // safely assume each group is independent. List merges = new ArrayList(overlapGroups.size()); List> rets; @@ -3138,12 +3035,12 @@ LOG.error("Overlap merges were interrupted", e); return false; } - for(int i=0; i f = rets.get(i); try { f.get(); - } catch(ExecutionException e) { + } catch (ExecutionException e) { LOG.warn("Failed to merge overlap group" + work, e.getCause()); } catch (InterruptedException e) { LOG.error("Waiting for overlap merges was interrupted", e); @@ -3155,7 +3052,6 @@ /** * This dumps data in a visually reasonable way for visual debugging - * * @param splits * @param regions */ @@ -3166,8 +3062,7 @@ sb.setLength(0); // clear out existing buffer, if any. sb.append(Bytes.toStringBinary(k) + ":\t"); for (HbckInfo r : regions.get(k)) { - sb.append("[ "+ r.toString() + ", " - + Bytes.toStringBinary(r.getEndKey())+ "]\t"); + sb.append("[ " + r.toString() + ", " + Bytes.toStringBinary(r.getEndKey()) + "]\t"); } errors.print(sb.toString()); } @@ -3180,36 +3075,32 @@ for (byte[] k : regions.keySet()) { errors.print(Bytes.toStringBinary(k) + ":"); for (HbckInfo r : regions.get(k)) { - errors.print("[ " + r.toString() + ", " - + Bytes.toStringBinary(r.getEndKey()) + "]"); + errors.print("[ " + r.toString() + ", " + Bytes.toStringBinary(r.getEndKey()) + "]"); } errors.print("----"); } } public void dumpSidelinedRegions(Map regions) { - for (Map.Entry entry: regions.entrySet()) { + for (Map.Entry entry : regions.entrySet()) { TableName tableName = entry.getValue().getTableName(); Path path = entry.getKey(); - errors.print("This sidelined region dir should be bulk loaded: " - + path.toString()); + errors.print("This sidelined region dir should be bulk loaded: " + path.toString()); errors.print("Bulk load command looks like: " - + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles " - + path.toUri().getPath() + " "+ tableName); + + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles " + + path.toUri().getPath() + " " + tableName); } } - public Multimap getOverlapGroups( - TableName table) { + public Multimap getOverlapGroups(TableName table) { TableInfo ti = tablesInfo.get(table); return ti.overlapGroups; } /** - * Return a list of user-space table names whose metadata have not been - * modified in the last few milliseconds specified by timelag - * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER, - * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last + * Return a list of user-space table names whose metadata have not been modified in the last few + * milliseconds specified by timelag if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, + * STARTCODE_QUALIFIER, SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last * milliseconds specified by timelag, then the table is a candidate to be returned. * @return tables that have not been modified recently * @throws IOException if an error is encountered @@ -3236,7 +3127,7 @@ HTableDescriptor[] getHTableDescriptors(List tableNames) { HTableDescriptor[] htd = new HTableDescriptor[0]; - LOG.info("getHTableDescriptors == tableNames => " + tableNames); + LOG.info("getHTableDescriptors == tableNames => " + tableNames); try (Connection conn = ConnectionFactory.createConnection(getConf()); Admin admin = conn.getAdmin()) { htd = admin.getTableDescriptorsByTableName(tableNames); @@ -3247,9 +3138,8 @@ } /** - * Gets the entry in regionInfo corresponding to the the given encoded - * region name. If the region has not been seen yet, a new entry is added - * and returned. + * Gets the entry in regionInfo corresponding to the the given encoded region name. If the region + * has not been seen yet, a new entry is added and returned. */ private synchronized HbckInfo getOrCreateInfo(String name) { HbckInfo hbi = regionInfoMap.get(name); @@ -3270,7 +3160,7 @@ } zkw.close(); } - + private void checkAndFixReplication() throws IOException { ZooKeeperWatcher zkw = createZooKeeperWatcher(); try { @@ -3287,14 +3177,13 @@ } /** - * Check values in regionInfo for hbase:meta - * Check if zero or more than one regions with hbase:meta are found. - * If there are inconsistencies (i.e. zero or more than one regions - * pretend to be holding the hbase:meta) try to fix that and report an error. - * @throws IOException from HBaseFsckRepair functions - * @throws KeeperException - * @throws InterruptedException - */ + * Check values in regionInfo for hbase:meta Check if zero or more than one regions with + * hbase:meta are found. If there are inconsistencies (i.e. zero or more than one regions pretend + * to be holding the hbase:meta) try to fix that and report an error. + * @throws IOException from HBaseFsckRepair functions + * @throws KeeperException + * @throws InterruptedException + */ boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException { Map metaRegions = new HashMap(); for (HbckInfo value : regionInfoMap.values()) { @@ -3302,8 +3191,8 @@ metaRegions.put(value.getReplicaId(), value); } } - int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME) - .getRegionReplication(); + int metaReplication = + admin.getTableDescriptor(TableName.META_TABLE_NAME).getRegionReplication(); boolean noProblem = true; // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas // Check the deployed servers. It should be exactly one server for each replica. @@ -3318,12 +3207,11 @@ if (servers.size() == 0) { assignMetaReplica(i); } else if (servers.size() > 1) { - errors - .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + - metaHbckInfo.getReplicaId() + " is found on more than one region."); + errors.reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + + metaHbckInfo.getReplicaId() + " is found on more than one region."); if (shouldFixAssignments()) { - errors.print("Trying to fix a problem with hbase:meta, replicaId " + - metaHbckInfo.getReplicaId() +".."); + errors.print("Trying to fix a problem with hbase:meta, replicaId " + + metaHbckInfo.getReplicaId() + ".."); setShouldRerun(); // try fix it (treat is a dupe assignment) HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers); @@ -3335,11 +3223,11 @@ for (Map.Entry entry : metaRegions.entrySet()) { noProblem = false; errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, - "hbase:meta replicas are deployed in excess. Configured " + metaReplication + - ", deployed " + metaRegions.size()); + "hbase:meta replicas are deployed in excess. Configured " + metaReplication + ", deployed " + + metaRegions.size()); if (shouldFixAssignments()) { - errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() + - " of hbase:meta.."); + errors.print( + "Trying to undeploy excess replica, replicaId: " + entry.getKey() + " of hbase:meta.."); setShouldRerun(); unassignMetaReplica(entry.getValue()); } @@ -3349,8 +3237,8 @@ return noProblem; } - private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException, - KeeperException { + private void unassignMetaReplica(HbckInfo hi) + throws IOException, InterruptedException, KeeperException { undeployRegions(hi); ZooKeeperWatcher zkw = createZooKeeperWatcher(); try { @@ -3362,14 +3250,14 @@ private void assignMetaReplica(int replicaId) throws IOException, KeeperException, InterruptedException { - errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + - replicaId +" is not found on any region."); + errors.reportError(ERROR_CODE.NO_META_REGION, + "hbase:meta, replicaId " + replicaId + " is not found on any region."); if (shouldFixAssignments()) { errors.print("Trying to fix a problem with hbase:meta.."); setShouldRerun(); // try to fix it (treat it as unassigned region) - HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( - HRegionInfo.FIRST_META_REGIONINFO, replicaId); + HRegionInfo h = + RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId); HBaseFsckRepair.fixUnassigned(admin, h); HBaseFsckRepair.waitUntilAssigned(admin, h); } @@ -3387,7 +3275,7 @@ final Comparator comp = new Comparator() { @Override public int compare(Cell k1, Cell k2) { - return (int)(k1.getTimestamp() - k2.getTimestamp()); + return (int) (k1.getTimestamp() - k2.getTimestamp()); } }; @@ -3396,7 +3284,7 @@ try { // record the latest modification of this META record - long ts = Collections.max(result.listCells(), comp).getTimestamp(); + long ts = Collections.max(result.listCells(), comp).getTimestamp(); RegionLocations rl = MetaTableAccessor.getRegionLocations(result); if (rl == null) { emptyRegionInfoQualifiers.add(result); @@ -3405,16 +3293,15 @@ return true; } ServerName sn = null; - if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null || - rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) { + if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null + || rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) { emptyRegionInfoQualifiers.add(result); errors.reportError(ERROR_CODE.EMPTY_META_CELL, "Empty REGIONINFO_QUALIFIER found in hbase:meta"); return true; } HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo(); - if (!(isTableIncluded(hri.getTable()) - || hri.isMetaRegion())) { + if (!(isTableIncluded(hri.getTable()) || hri.isMetaRegion())) { return true; } PairOfSameType daughters = MetaTableAccessor.getDaughterRegions(result); @@ -3441,8 +3328,8 @@ } } PairOfSameType mergeRegions = MetaTableAccessor.getMergeRegions(result); - for (HRegionInfo mergeRegion : new HRegionInfo[] { - mergeRegions.getFirst(), mergeRegions.getSecond() }) { + for (HRegionInfo mergeRegion : new HRegionInfo[] { mergeRegions.getFirst(), + mergeRegions.getSecond() }) { if (mergeRegion != null) { // This region is already been merged HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName()); @@ -3475,16 +3362,16 @@ * Stores the regioninfo entries scanned from META */ static class MetaEntry extends HRegionInfo { - ServerName regionServer; // server hosting this region - long modTime; // timestamp of most recent modification metadata - HRegionInfo splitA, splitB; //split daughters + ServerName regionServer; // server hosting this region + long modTime; // timestamp of most recent modification metadata + HRegionInfo splitA, splitB; // split daughters public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) { this(rinfo, regionServer, modTime, null, null); } - public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime, - HRegionInfo splitA, HRegionInfo splitB) { + public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime, HRegionInfo splitA, + HRegionInfo splitB) { super(rinfo); this.regionServer = regionServer; this.modTime = modTime; @@ -3528,7 +3415,7 @@ static class HdfsEntry { HRegionInfo hri; Path hdfsRegionDir = null; - long hdfsRegionDirModTime = 0; + long hdfsRegionDirModTime = 0; boolean hdfsRegioninfoFilePresent = false; boolean hdfsOnlyEdits = false; } @@ -3547,8 +3434,8 @@ } /** - * Maintain information about a particular region. It gathers information - * from three places -- HDFS, META, and region servers. + * Maintain information about a particular region. It gathers information from three places -- + * HDFS, META, and region servers. */ public static class HbckInfo implements KeyRange { private MetaEntry metaEntry = null; // info in META @@ -3565,29 +3452,28 @@ } public synchronized int getReplicaId() { - return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId; + return metaEntry != null ? metaEntry.getReplicaId() : deployedReplicaId; } public synchronized void addServer(HRegionInfo hri, ServerName server) { - OnlineEntry rse = new OnlineEntry() ; + OnlineEntry rse = new OnlineEntry(); rse.hri = hri; rse.hsa = server; this.deployedEntries.add(rse); this.deployedOn.add(server); // save the replicaId that we see deployed in the cluster this.deployedReplicaId = hri.getReplicaId(); - this.primaryHRIForDeployedReplica = - RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); + this.primaryHRIForDeployedReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); } @Override public synchronized String toString() { StringBuilder sb = new StringBuilder(); sb.append("{ meta => "); - sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null"); - sb.append( ", hdfs => " + getHdfsRegionDir()); - sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries)); - sb.append( ", replicaId => " + getReplicaId()); + sb.append((metaEntry != null) ? metaEntry.getRegionNameAsString() : "null"); + sb.append(", hdfs => " + getHdfsRegionDir()); + sb.append(", deployed => " + Joiner.on(", ").join(deployedEntries)); + sb.append(", replicaId => " + getReplicaId()); sb.append(" }"); return sb.toString(); } @@ -3732,8 +3618,8 @@ return tableCompare; } - int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( - l.getStartKey(), r.getStartKey()); + int startComparison = + RegionSplitCalculator.BYTES_COMPARATOR.compare(l.getStartKey(), r.getStartKey()); if (startComparison != 0) { return startComparison; } @@ -3743,8 +3629,7 @@ endKey = (endKey.length == 0) ? null : endKey; byte[] endKey2 = l.getEndKey(); endKey2 = (endKey2.length == 0) ? null : endKey2; - int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare( - endKey2, endKey); + int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(endKey2, endKey); if (endComparison != 0) { return endComparison; @@ -3763,7 +3648,7 @@ return -1; } // both l.hdfsEntry and r.hdfsEntry must not be null. - return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId()); + return (int) (l.hdfsEntry.hri.getRegionId() - r.hdfsEntry.hri.getRegionId()); } }; @@ -3775,16 +3660,15 @@ int numOfSkippedRegions; errors.print("Summary:"); for (TableInfo tInfo : tablesInfo.values()) { - numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ? - skippedRegions.get(tInfo.getName()).size() : 0; + numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) + ? skippedRegions.get(tInfo.getName()).size() : 0; if (errors.tableHasErrors(tInfo)) { errors.print("Table " + tInfo.getName() + " is inconsistent."); - } else if (numOfSkippedRegions > 0){ - errors.print("Table " + tInfo.getName() + " is okay (with " - + numOfSkippedRegions + " skipped regions)."); - } - else { + } else if (numOfSkippedRegions > 0) { + errors.print("Table " + tInfo.getName() + " is okay (with " + numOfSkippedRegions + + " skipped regions)."); + } else { errors.print("Table " + tInfo.getName() + " is okay."); } errors.print(" Number of regions: " + tInfo.getNumRegions()); @@ -3792,7 +3676,7 @@ Set skippedRegionStrings = skippedRegions.get(tInfo.getName()); System.out.println(" Number of skipped regions: " + numOfSkippedRegions); System.out.println(" List of skipped regions:"); - for(String sr : skippedRegionStrings) { + for (String sr : skippedRegionStrings) { System.out.println(" " + sr); } } @@ -3805,9 +3689,9 @@ } } - static ErrorReporter getErrorReporter( - final Configuration conf) throws ClassNotFoundException { - Class reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class); + static ErrorReporter getErrorReporter(final Configuration conf) throws ClassNotFoundException { + Class reporter = + conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class); return ReflectionUtils.newInstance(reporter, conf); } @@ -3815,33 +3699,41 @@ enum ERROR_CODE { UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META, NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, - NOT_DEPLOYED, - MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, + NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR, ORPHAN_TABLE_STATE, NO_TABLE_STATE, UNDELETED_REPLICATION_QUEUE } + void clear(); + void report(String message); + void reportError(String message); + void reportError(ERROR_CODE errorCode, String message); + void reportError(ERROR_CODE errorCode, String message, TableInfo table); + void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info); - void reportError( - ERROR_CODE errorCode, - String message, - TableInfo table, - HbckInfo info1, - HbckInfo info2 - ); + + void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, + HbckInfo info2); + int summarize(); + void detail(String details); + ArrayList getErrorList(); + void progress(); + void print(String message); + void resetErrors(); + boolean tableHasErrors(TableInfo table); } @@ -3886,7 +3778,7 @@ @Override public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, - HbckInfo info) { + HbckInfo info) { errorTables.add(table); String reference = "(region " + info.getRegionNameAsString() + ")"; reportError(errorCode, reference + " " + message); @@ -3894,10 +3786,10 @@ @Override public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, - HbckInfo info1, HbckInfo info2) { + HbckInfo info1, HbckInfo info2) { errorTables.add(table); - String reference = "(regions " + info1.getRegionNameAsString() - + " and " + info2.getRegionNameAsString() + ")"; + String reference = "(regions " + info1.getRegionNameAsString() + " and " + + info2.getRegionNameAsString() + ")"; reportError(errorCode, reference + " " + message); } @@ -3907,13 +3799,13 @@ } /** - * Report error information, but do not increment the error count. Intended for cases - * where the actual error would have been reported previously. + * Report error information, but do not increment the error count. Intended for cases where the + * actual error would have been reported previously. * @param message */ @Override public synchronized void report(String message) { - if (! summary) { + if (!summary) { System.out.println("ERROR: " + message); } showProgress = 0; @@ -3921,8 +3813,7 @@ @Override public synchronized int summarize() { - System.out.println(Integer.toString(errorCount) + - " inconsistencies detected."); + System.out.println(Integer.toString(errorCount) + " inconsistencies detected."); if (errorCount == 0) { System.out.println("Status: OK"); return 0; @@ -3982,8 +3873,7 @@ private ErrorReporter errors; private HConnection connection; - WorkItemRegion(HBaseFsck hbck, ServerName info, - ErrorReporter errors, HConnection connection) { + WorkItemRegion(HBaseFsck hbck, ServerName info, ErrorReporter errors, HConnection connection) { this.hbck = hbck; this.rsinfo = info; this.errors = errors; @@ -4001,25 +3891,24 @@ regions = filterRegions(regions); if (details) { - errors.detail("RegionServer: " + rsinfo.getServerName() + - " number of regions: " + regions.size()); - for (HRegionInfo rinfo: regions) { - errors.detail(" " + rinfo.getRegionNameAsString() + - " id: " + rinfo.getRegionId() + - " encoded_name: " + rinfo.getEncodedName() + - " start: " + Bytes.toStringBinary(rinfo.getStartKey()) + - " end: " + Bytes.toStringBinary(rinfo.getEndKey())); + errors.detail( + "RegionServer: " + rsinfo.getServerName() + " number of regions: " + regions.size()); + for (HRegionInfo rinfo : regions) { + errors.detail(" " + rinfo.getRegionNameAsString() + " id: " + rinfo.getRegionId() + + " encoded_name: " + rinfo.getEncodedName() + " start: " + + Bytes.toStringBinary(rinfo.getStartKey()) + " end: " + + Bytes.toStringBinary(rinfo.getEndKey())); } } // check to see if the existence of this region matches the region in META - for (HRegionInfo r:regions) { + for (HRegionInfo r : regions) { HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName()); hbi.addServer(r, rsinfo); } - } catch (IOException e) { // unable to connect to the region server. - errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() + - " Unable to fetch region information. " + e); + } catch (IOException e) { // unable to connect to the region server. + errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, + "RegionServer: " + rsinfo.getServerName() + " Unable to fetch region information. " + e); throw e; } return null; @@ -4028,8 +3917,7 @@ private List filterRegions(List regions) { List ret = Lists.newArrayList(); for (HRegionInfo hri : regions) { - if (hri.isMetaTable() || (!hbck.checkMetaOnly - && hbck.isTableIncluded(hri.getTable()))) { + if (hri.isMetaTable() || (!hbck.checkMetaOnly && hbck.isTableIncluded(hri.getTable()))) { ret.add(hri); } } @@ -4038,8 +3926,7 @@ } /** - * Contact hdfs and get all information about specified table directory into - * regioninfo list. + * Contact hdfs and get all information about specified table directory into regioninfo list. */ static class WorkItemHdfsDir implements Callable { private HBaseFsck hbck; @@ -4047,8 +3934,7 @@ private ErrorReporter errors; private FileSystem fs; - WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, - FileStatus status) { + WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors, FileStatus status) { this.hbck = hbck; this.fs = fs; this.tableDir = status; @@ -4068,13 +3954,12 @@ continue; } - LOG.debug("Loading region info from hdfs:"+ regionDir.getPath()); + LOG.debug("Loading region info from hdfs:" + regionDir.getPath()); HbckInfo hbi = hbck.getOrCreateInfo(encodedName); HdfsEntry he = new HdfsEntry(); synchronized (hbi) { if (hbi.getHdfsRegionDir() != null) { - errors.print("Directory " + encodedName + " duplicate??" + - hbi.getHdfsRegionDir()); + errors.print("Directory " + encodedName + " duplicate??" + hbi.getHdfsRegionDir()); } he.hdfsRegionDir = regionDir.getPath(); @@ -4102,8 +3987,7 @@ } catch (IOException e) { // unable to connect to the region server. errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: " - + tableDir.getPath().getName() - + " Unable to fetch region information. " + e); + + tableDir.getPath().getName() + " Unable to fetch region information. " + e); throw e; } return null; @@ -4111,8 +3995,7 @@ } /** - * Contact hdfs and get all information about specified table directory into - * regioninfo list. + * Contact hdfs and get all information about specified table directory into regioninfo list. */ static class WorkItemHdfsRegionInfo implements Callable { private HbckInfo hbi; @@ -4134,8 +4017,7 @@ hbck.loadHdfsRegioninfo(hbi); } catch (IOException ioe) { String msg = "Orphan region in HDFS: Unable to load .regioninfo from table " - + hbi.getTableName() + " in hdfs dir " - + hbi.getHdfsRegionDir() + + hbi.getTableName() + " in hdfs dir " + hbi.getHdfsRegionDir() + "! It may be an invalid format or version file. Treating as " + "an orphaned regiondir."; errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); @@ -4154,8 +4036,8 @@ }; /** - * Display the full report from fsck. This displays all live and dead region - * servers, and all known regions. + * Display the full report from fsck. This displays all live and dead region servers, and all + * known regions. */ public static void setDisplayFullReport() { details = true; @@ -4183,25 +4065,22 @@ } /** - * The balancer should be disabled if we are modifying HBase. - * It can be disabled if you want to prevent region movement from causing - * false positives. + * The balancer should be disabled if we are modifying HBase. It can be disabled if you want to + * prevent region movement from causing false positives. */ public boolean shouldDisableBalancer() { return fixAny || disableBalancer; } /** - * Set summary mode. - * Print only summary of the tables and status (OK or INCONSISTENT) + * Set summary mode. Print only summary of the tables and status (OK or INCONSISTENT) */ static void setSummary() { summary = true; } /** - * Set hbase:meta check mode. - * Print only info about hbase:meta table deployment/state + * Set hbase:meta check mode. Print only info about hbase:meta table deployment/state */ void setCheckMetaOnly() { checkMetaOnly = true; @@ -4215,14 +4094,13 @@ } /** - * Set table locks fix mode. - * Delete table locks held for a long time + * Set table locks fix mode. Delete table locks held for a long time */ public void setFixTableLocks(boolean shouldFix) { fixTableLocks = shouldFix; fixAny |= shouldFix; } - + /** * Set replication fix mode. */ @@ -4232,9 +4110,8 @@ } /** - * Check if we should rerun fsck again. This checks if we've tried to - * fix something and we should rerun fsck tool again. - * Display the full report from fsck. This displays all live and dead + * Check if we should rerun fsck again. This checks if we've tried to fix something and we should + * rerun fsck tool again. Display the full report from fsck. This displays all live and dead * region servers, and all known regions. */ void setShouldRerun() { @@ -4246,8 +4123,8 @@ } /** - * Fix inconsistencies found by fsck. This should try to fix errors (if any) - * found by fsck utility. + * Fix inconsistencies found by fsck. This should try to fix errors (if any) found by fsck + * utility. */ public void setFixAssignments(boolean shouldFix) { fixAssignments = shouldFix; @@ -4383,8 +4260,7 @@ } /** - * Only check/fix tables specified by the list, - * Empty list means all tables are included. + * Only check/fix tables specified by the list, Empty list means all tables are included. */ boolean isTableIncluded(TableName table) { return (tablesIncluded.size() == 0) || tablesIncluded.contains(table); @@ -4399,8 +4275,8 @@ } /** - * We are interested in only those tables that have not changed their state in - * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag + * We are interested in only those tables that have not changed their state in hbase:meta during + * the last few seconds specified by hbase.admin.fsck.timelag * @param seconds - the time in seconds */ public void setTimeLag(long seconds) { @@ -4408,14 +4284,14 @@ } /** - * * @param sidelineDir - HDFS path to sideline data */ public void setSidelineDir(String sidelineDir) { this.sidelineDir = new Path(sidelineDir); } - protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { + protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) + throws IOException { return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles); } @@ -4442,33 +4318,41 @@ out.println(" where [opts] are:"); out.println(" -help Display help options (this)"); out.println(" -details Display full report of all regions."); - out.println(" -timelag Process only regions that " + - " have not experienced any metadata updates in the last " + - " seconds."); - out.println(" -sleepBeforeRerun Sleep this many seconds" + - " before checking if the fix worked if run with -fix"); + out.println(" -timelag Process only regions that " + + " have not experienced any metadata updates in the last " + " seconds."); + out.println(" -sleepBeforeRerun Sleep this many seconds" + + " before checking if the fix worked if run with -fix"); out.println(" -summary Print only summary of the tables and status."); out.println(" -metaonly Only check the state of the hbase:meta table."); out.println(" -sidelineDir HDFS path to backup existing meta."); - out.println(" -boundaries Verify that regions boundaries are the same between META and store files."); + out.println( + " -boundaries Verify that regions boundaries are the same between META and store files."); out.println(" -exclusive Abort if another hbck is exclusive or fixing."); out.println(" -disableBalancer Disable the load balancer."); out.println(""); out.println(" Metadata Repair options: (expert features, use with caution!)"); - out.println(" -fix Try to fix region assignments. This is for backwards compatiblity"); + out.println( + " -fix Try to fix region assignments. This is for backwards compatiblity"); out.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix"); - out.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); + out.println( + " -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); out.println(" -noHdfsChecking Don't load/check region info from HDFS." + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap"); out.println(" -fixHdfsHoles Try to fix region holes in hdfs."); out.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); - out.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); + out.println( + " -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); out.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); out.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); - out.println(" -maxMerge When fixing region overlaps, allow at most regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); - out.println(" -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps"); - out.println(" -maxOverlapsToSideline When fixing region overlaps, allow at most regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)"); + out.println( + " -maxMerge When fixing region overlaps, allow at most regions to merge. (n=" + + DEFAULT_MAX_MERGE + " by default)"); + out.println( + " -sidelineBigOverlaps When fixing region overlaps, allow to sideline big overlaps"); + out.println( + " -maxOverlapsToSideline When fixing region overlaps, allow at most regions to sideline per group. (n=" + + DEFAULT_OVERLAPS_TO_SIDELINE + " by default)"); out.println(" -fixSplitParents Try to force offline split parents to be online."); out.println(" -ignorePreCheckPermission ignore filesystem permission pre-check"); out.println(" -fixReferenceFiles Try to offline lingering reference store files"); @@ -4477,23 +4361,26 @@ out.println(""); out.println(" Datafile Repair options: (expert features, use with caution!)"); - out.println(" -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid"); - out.println(" -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles"); + out.println( + " -checkCorruptHFiles Check all Hfiles by opening them to make sure they are valid"); + out.println( + " -sidelineCorruptHFiles Quarantine corrupted HFiles. implies -checkCorruptHFiles"); out.println(""); out.println(" Metadata Repair shortcuts"); - out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + - "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks"); + out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " + + "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks"); out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles"); out.println(""); out.println(" Table lock options"); - out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)"); + out.println( + " -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)"); out.println(""); out.println(" Replication options"); out.println(" -fixReplication Deletes replication queues for removed peers"); - + out.flush(); errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString()); @@ -4503,7 +4390,6 @@ /** * Main program - * * @param args * @throws Exception */ @@ -4521,7 +4407,10 @@ * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line. */ static class HBaseFsckTool extends Configured implements Tool { - HBaseFsckTool(Configuration conf) { super(conf); } + HBaseFsckTool(Configuration conf) { + super(conf); + } + @Override public int run(String[] args) throws Exception { HBaseFsck hbck = new HBaseFsck(getConf()); @@ -4531,9 +4420,8 @@ } }; - - public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException, - ServiceException, InterruptedException { + public HBaseFsck exec(ExecutorService exec, String[] args) + throws KeeperException, IOException, ServiceException, InterruptedException { long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN; boolean checkCorruptHFiles = false; @@ -4556,7 +4444,7 @@ return printUsageAndExit(); } try { - long timelag = Long.parseLong(args[i+1]); + long timelag = Long.parseLong(args[i + 1]); setTimeLag(timelag); } catch (NumberFormatException e) { errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value."); @@ -4565,12 +4453,11 @@ i++; } else if (cmd.equals("-sleepBeforeRerun")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, - "HBaseFsck: -sleepBeforeRerun needs a value."); + errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value."); return printUsageAndExit(); } try { - sleepBeforeRerun = Long.parseLong(args[i+1]); + sleepBeforeRerun = Long.parseLong(args[i + 1]); } catch (NumberFormatException e) { errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value."); return printUsageAndExit(); @@ -4648,7 +4535,7 @@ return printUsageAndExit(); } try { - int maxOverlapsToSideline = Integer.parseInt(args[i+1]); + int maxOverlapsToSideline = Integer.parseInt(args[i + 1]); setMaxOverlapsToSideline(maxOverlapsToSideline); } catch (NumberFormatException e) { errors.reportError(ERROR_CODE.WRONG_USAGE, @@ -4658,16 +4545,14 @@ i++; } else if (cmd.equals("-maxMerge")) { if (i == args.length - 1) { - errors.reportError(ERROR_CODE.WRONG_USAGE, - "-maxMerge needs a numeric value argument."); + errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument."); return printUsageAndExit(); } try { - int maxMerge = Integer.parseInt(args[i+1]); + int maxMerge = Integer.parseInt(args[i + 1]); setMaxMerge(maxMerge); } catch (NumberFormatException e) { - errors.reportError(ERROR_CODE.WRONG_USAGE, - "-maxMerge needs a numeric value argument."); + errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument."); return printUsageAndExit(); } i++; @@ -4766,16 +4651,14 @@ /** * ls -r for debugging purposes */ - public static void debugLsr(Configuration conf, - Path p) throws IOException { + public static void debugLsr(Configuration conf, Path p) throws IOException { debugLsr(conf, p, new PrintingErrorReporter()); } /** * ls -r for debugging purposes */ - public static void debugLsr(Configuration conf, - Path p, ErrorReporter errors) throws IOException { + public static void debugLsr(Configuration conf, Path p, ErrorReporter errors) throws IOException { if (!LOG.isDebugEnabled() || p == null) { return; } @@ -4792,7 +4675,7 @@ } if (fs.getFileStatus(p).isDirectory()) { - FileStatus[] fss= fs.listStatus(p); + FileStatus[] fss = fs.listStatus(p); for (FileStatus status : fss) { debugLsr(conf, status.getPath(), errors); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java index 21935f3..7048290 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsckOneRS.java @@ -1,23 +1,42 @@ /** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable + * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" + * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License + * for the specific language governing permissions and limitations under the License. */ package org.apache.hadoop.hbase.util; + +import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; +import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors; +import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -51,8 +70,8 @@ import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; -import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl; import org.apache.hadoop.hbase.regionserver.SplitTransactionFactory; +import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl; import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction; import org.apache.hadoop.hbase.replication.ReplicationFactory; import org.apache.hadoop.hbase.replication.ReplicationQueues; @@ -69,33 +88,13 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import java.util.concurrent.SynchronousQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.*; -import static org.junit.Assert.*; - -@Category({MiscTests.class, LargeTests.class}) +@Category({ MiscTests.class, LargeTests.class }) public class TestHBaseFsckOneRS extends BaseTestHBaseFsck { @BeforeClass public static void setUpBeforeClass() throws Exception { TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, - MasterSyncObserver.class.getName()); + MasterSyncObserver.class.getName()); conf.setInt("hbase.regionserver.handler.count", 2); conf.setInt("hbase.regionserver.metahandler.count", 30); @@ -138,11 +137,10 @@ EnvironmentEdgeManager.reset(); } - /** * This creates a clean table and confirms that the table is clean. */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testHBaseFsckClean() throws Exception { assertNoErrors(doFsck(conf, false)); TableName table = TableName.valueOf("tableClean"); @@ -166,10 +164,9 @@ /** * Test thread pooling in the case where there are more regions than threads */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testHbckThreadpooling() throws Exception { - TableName table = - TableName.valueOf("tableDupeStartKey"); + TableName table = TableName.valueOf("tableDupeStartKey"); try { // Create table with 4 regions setupTable(table); @@ -185,12 +182,11 @@ } } - @Test (timeout=180000) + @Test(timeout = 180000) public void testTableWithNoRegions() throws Exception { // We might end up with empty regions in a table // see also testNoHdfsTable() - TableName table = - TableName.valueOf(name.getMethodName()); + TableName table = TableName.valueOf(name.getMethodName()); try { // create table with one region HTableDescriptor desc = new HTableDescriptor(table); @@ -201,10 +197,11 @@ // Mess it up by leaving a hole in the assignment, meta, and hdfs data deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, - HConstants.EMPTY_END_ROW, false, false, true); + HConstants.EMPTY_END_ROW, false, false, true); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); doFsck(conf, true); @@ -218,7 +215,7 @@ } } - @Test (timeout=180000) + @Test(timeout = 180000) public void testHbckFixOrphanTable() throws Exception { TableName table = TableName.valueOf("tableInfo"); FileSystem fs = null; @@ -226,16 +223,16 @@ try { setupTable(table); - Path hbaseTableDir = FSUtils.getTableDir( - FSUtils.getRootDir(conf), table); + Path hbaseTableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table); fs = hbaseTableDir.getFileSystem(conf); FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); tableinfo = status.getPath(); fs.rename(tableinfo, new Path("/.tableinfo")); - //to report error if .tableinfo is missing. + // to report error if .tableinfo is missing. HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLEINFO_FILE }); // fix OrphanTable with default .tableinfo (htd not yet cached on master) hbck = doFsck(conf, true); @@ -267,30 +264,29 @@ } } - @Test (timeout=180000) + @Test(timeout = 180000) public void testReadOnlyProperty() throws Exception { HBaseFsck hbck = doFsck(conf, false); Assert.assertEquals("shouldIgnorePreCheckPermission", true, - hbck.shouldIgnorePreCheckPermission()); + hbck.shouldIgnorePreCheckPermission()); hbck = doFsck(conf, true); Assert.assertEquals("shouldIgnorePreCheckPermission", false, - hbck.shouldIgnorePreCheckPermission()); + hbck.shouldIgnorePreCheckPermission()); hbck = doFsck(conf, true); hbck.setIgnorePreCheckPermission(true); Assert.assertEquals("shouldIgnorePreCheckPermission", true, - hbck.shouldIgnorePreCheckPermission()); + hbck.shouldIgnorePreCheckPermission()); } /** - * This creates and fixes a bad table where a region is completely contained - * by another region, and there is a hole (sort of like a bad split) + * This creates and fixes a bad table where a region is completely contained by another region, + * and there is a hole (sort of like a bad split) */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testOverlapAndOrphan() throws Exception { - TableName table = - TableName.valueOf("tableOverlapAndOrphan"); + TableName table = TableName.valueOf("tableOverlapAndOrphan"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -298,27 +294,28 @@ // Mess it up by creating an overlap in the metadata admin.disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, - true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); + true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); admin.enableTable(table); HRegionInfo hriOverlap = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); - TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() - .waitForAssignment(hriOverlap); + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // fix the problem. doFsck(conf, true); // verify that overlaps are fixed - HBaseFsck hbck2 = doFsck(conf,false); + HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); @@ -328,14 +325,12 @@ } /** - * This creates and fixes a bad table where a region overlaps two regions -- - * a start key contained in another region and its end key is contained in - * yet another region. + * This creates and fixes a bad table where a region overlaps two regions -- a start key contained + * in another region and its end key is contained in yet another region. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testCoveredStartKey() throws Exception { - TableName table = - TableName.valueOf("tableCoveredStartKey"); + TableName table = TableName.valueOf("tableCoveredStartKey"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -344,14 +339,15 @@ HRegionInfo hriOverlap = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); - TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() - .waitForAssignment(hriOverlap); + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN, - HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN, + HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN }); assertEquals(3, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); @@ -369,13 +365,12 @@ } /** - * This creates and fixes a bad table with a missing region -- hole in meta - * and data missing in the fs. + * This creates and fixes a bad table with a missing region -- hole in meta and data missing in + * the fs. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testRegionHole() throws Exception { - TableName table = - TableName.valueOf("tableRegionHole"); + TableName table = TableName.valueOf("tableRegionHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -383,11 +378,12 @@ // Mess it up by leaving a hole in the assignment, meta, and hdfs data admin.disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, - true, true); + true, true); admin.enableTable(table); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); @@ -395,7 +391,7 @@ doFsck(conf, true); // check that hole fixed - assertNoErrors(doFsck(conf,false)); + assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length - 2, countRows()); // lost a region so lost a row } finally { cleanupTable(table); @@ -405,18 +401,16 @@ /** * The region is not deployed when the table is disabled. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testRegionShouldNotBeDeployed() throws Exception { - TableName table = - TableName.valueOf("tableRegionShouldNotBeDeployed"); + TableName table = TableName.valueOf("tableRegionShouldNotBeDeployed"); try { LOG.info("Starting testRegionShouldNotBeDeployed."); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); assertTrue(cluster.waitForActiveAndReadyMaster()); - - byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"), - Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") }; + byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"), + Bytes.toBytes("ccc"), Bytes.toBytes("ddd") }; HTableDescriptor htdDisabled = new HTableDescriptor(table); htdDisabled.addFamily(new HColumnDescriptor(FAM)); @@ -445,12 +439,12 @@ // If going through AM/ZK, the region won't be open. // Even it is opened, AM will close it which causes // flakiness of this test. - HRegion r = HRegion.openHRegion( - region, htdDisabled, hrs.getWAL(region), conf); + HRegion r = HRegion.openHRegion(region, htdDisabled, hrs.getWAL(region), conf); hrs.addToOnlineRegions(r); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.SHOULD_NOT_BE_DEPLOYED }); // fix this fault doFsck(conf, true); @@ -465,26 +459,26 @@ /** * This test makes sure that parallel instances of Hbck is disabled. - * * @throws Exception */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testParallelHbck() throws Exception { final ExecutorService service; - final Future hbck1,hbck2; + final Future hbck1, hbck2; class RunHbck implements Callable { boolean fail = true; + @Override - public HBaseFsck call(){ + public HBaseFsck call() { Configuration c = new Configuration(conf); c.setInt("hbase.hbck.lockfile.attempts", 1); // HBASE-13574 found that in HADOOP-2.6 and later, the create file would internally retry. // To avoid flakiness of the test, set low max wait time. c.setInt("hbase.hbck.lockfile.maxwaittime", 3); - try{ + try { return doFsck(c, true); // Exclusive hbck only when fixing - } catch(Exception e){ + } catch (Exception e) { if (e.getMessage().contains("Duplicate hbck")) { fail = false; } @@ -498,43 +492,42 @@ hbck1 = service.submit(new RunHbck()); hbck2 = service.submit(new RunHbck()); service.shutdown(); - //wait for 15 seconds, for both hbck calls finish + // wait for 15 seconds, for both hbck calls finish service.awaitTermination(15, TimeUnit.SECONDS); HBaseFsck h1 = hbck1.get(); HBaseFsck h2 = hbck2.get(); // Make sure only one of the calls was successful - assert(h1 == null || h2 == null); + assert (h1 == null || h2 == null); if (h1 != null) { - assert(h1.getRetCode() >= 0); + assert (h1.getRetCode() >= 0); } if (h2 != null) { - assert(h2.getRetCode() >= 0); + assert (h2.getRetCode() >= 0); } } /** - * This test makes sure that with enough retries both parallel instances - * of hbck will be completed successfully. - * + * This test makes sure that with enough retries both parallel instances of hbck will be completed + * successfully. * @throws Exception */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testParallelWithRetriesHbck() throws Exception { final ExecutorService service; - final Future hbck1,hbck2; + final Future hbck1, hbck2; // With the ExponentialBackoffPolicyWithLimit (starting with 200 milliseconds sleep time, and // max sleep time of 5 seconds), we can retry around 15 times within 80 seconds before bail out. // // Note: the reason to use 80 seconds is that in HADOOP-2.6 and later, the create file would - // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more + // retry up to HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds). See HBASE-13574 for more // details. final int timeoutInSeconds = 80; final int sleepIntervalInMilliseconds = 200; final int maxSleepTimeInMilliseconds = 6000; final int maxRetryAttempts = 15; - class RunHbck implements Callable{ + class RunHbck implements Callable { @Override public HBaseFsck call() throws Exception { @@ -552,19 +545,19 @@ hbck1 = service.submit(new RunHbck()); hbck2 = service.submit(new RunHbck()); service.shutdown(); - //wait for some time, for both hbck calls finish + // wait for some time, for both hbck calls finish service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS); HBaseFsck h1 = hbck1.get(); HBaseFsck h2 = hbck2.get(); // Both should be successful assertNotNull(h1); assertNotNull(h2); - assert(h1.getRetCode() >= 0); - assert(h2.getRetCode() >= 0); + assert (h1.getRetCode() >= 0); + assert (h2.getRetCode() >= 0); } - @Test (timeout = 180000) + @Test(timeout = 180000) public void testRegionBoundariesCheck() throws Exception { HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); // no errors @@ -580,7 +573,27 @@ } } - @Test (timeout=180000) + @Test(timeout = 180000) + public void testRegionBoundariesCheckWithFulshTable() throws Exception { + HBaseFsck hbck = doFsck(conf, false); + assertNoErrors(hbck); // no errors + TableName table = TableName.valueOf("testRegionBoundariesCheckWithTestFulshTable"); + try { + setupTable(table); + admin.flush(table); + hbck.connect(); // need connection to have access to META + hbck.checkRegionBoundaries(); + assertNoErrors(hbck); // no errors + } catch (IllegalArgumentException e) { + if (e.getMessage().endsWith("not a valid DFS filename.")) { + fail("Table directory path is not valid." + e.getMessage()); + } + } finally { + hbck.close(); + } + } + + @Test(timeout = 180000) public void testHbckAfterRegionMerge() throws Exception { TableName table = TableName.valueOf("testMergeRegionFilesInHdfs"); Table meta = null; @@ -590,7 +603,7 @@ setupTable(table); assertEquals(ROWKEYS.length, countRows()); - try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { + try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { // make sure data in regions, if in wal only there is no data loss admin.flush(table); HRegionInfo region1 = rl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo(); @@ -609,8 +622,8 @@ if (rl.getAllRegionLocations().size() < regionCountBeforeMerge) { break; } else if (System.currentTimeMillis() > timeout) { - fail("Time out waiting on region " + region1.getEncodedName() + " and " + region2 - .getEncodedName() + " be merged"); + fail("Time out waiting on region " + region1.getEncodedName() + " and " + + region2.getEncodedName() + " be merged"); } Thread.sleep(10); } @@ -627,11 +640,11 @@ IOUtils.closeQuietly(meta); } } + /** - * This creates entries in hbase:meta with no hdfs data. This should cleanly - * remove the table. + * This creates entries in hbase:meta with no hdfs data. This should cleanly remove the table. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testNoHdfsTable() throws Exception { TableName table = TableName.valueOf("NoHdfsTable"); setupTable(table); @@ -641,22 +654,25 @@ admin.flush(table); // Mess it up by deleting hdfs dirs - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), - Bytes.toBytes("A"), false, false, true); // don't rm meta - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), - Bytes.toBytes("B"), false, false, true); // don't rm meta - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), - Bytes.toBytes("C"), false, false, true); // don't rm meta - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), - Bytes.toBytes(""), false, false, true); // don't rm meta + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), false, + false, true); // don't rm meta + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), false, + false, true); // don't rm meta + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, + false, true); // don't rm meta + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), false, + false, true); // don't rm meta // also remove the table directory in hdfs deleteTableDir(table); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, }); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, + HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_TABLE_STATE, }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); @@ -664,14 +680,14 @@ doFsck(conf, true); // detect dangling regions and remove those // check that hole fixed - assertNoErrors(doFsck(conf,false)); + assertNoErrors(doFsck(conf, false)); assertFalse("Table " + table + " should have been deleted", admin.tableExists(table)); } /** * when the hbase.version file missing, It is fix the fault. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testNoVersionFile() throws Exception { // delete the hbase.version file Path rootDir = FSUtils.getRootDir(conf); @@ -681,7 +697,8 @@ // test HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NO_VERSION_FILE }); // fix hbase.version missing doFsck(conf, true); @@ -689,11 +706,10 @@ assertNoErrors(doFsck(conf, false)); } - @Test (timeout=180000) + @Test(timeout = 180000) public void testNoTableState() throws Exception { // delete the hbase.version file - TableName table = - TableName.valueOf("testNoTableState"); + TableName table = TableName.valueOf("testNoTableState"); try { setupTable(table); // make sure data in regions, if in wal only there is no data loss @@ -703,7 +719,8 @@ // test HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NO_TABLE_STATE }); // fix table state missing doFsck(conf, true); @@ -717,36 +734,37 @@ /** * This creates two tables and mess both of them and fix them one by one */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testFixByTable() throws Exception { - TableName table1 = - TableName.valueOf("testFixByTable1"); - TableName table2 = - TableName.valueOf("testFixByTable2"); + TableName table1 = TableName.valueOf("testFixByTable1"); + TableName table2 = TableName.valueOf("testFixByTable2"); try { setupTable(table1); // make sure data in regions, if in wal only there is no data loss admin.flush(table1); // Mess them up by leaving a hole in the hdfs data - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), - Bytes.toBytes("C"), false, false, true); // don't rm meta + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, + false, true); // don't rm meta setupTable(table2); // make sure data in regions, if in wal only there is no data loss admin.flush(table2); // Mess them up by leaving a hole in the hdfs data deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), false, - false, true); // don't rm meta + false, true); // don't rm meta HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); // fix hole in table 1 doFsck(conf, true, table1); // check that hole in table 1 fixed assertNoErrors(doFsck(conf, false, table1)); // check that hole in table 2 still there - assertErrors(doFsck(conf, false, table2), new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); + assertErrors(doFsck(conf, false, table2), new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); // fix hole in table 2 doFsck(conf, true, table2); @@ -758,13 +776,13 @@ cleanupTable(table2); } } + /** * A split parent in meta, in hdfs, and not deployed */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testLingeringSplitParent() throws Exception { - TableName table = - TableName.valueOf("testLingeringSplitParent"); + TableName table = TableName.valueOf("testLingeringSplitParent"); Table meta = null; try { setupTable(table); @@ -774,22 +792,20 @@ admin.flush(table); HRegionLocation location; - try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { + try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { location = rl.getRegionLocation(Bytes.toBytes("B")); } // Delete one region from meta, but not hdfs, unassign it. - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), - Bytes.toBytes("C"), true, true, false); + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true, + true, false); // Create a new meta entry to fake it as a split parent. meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); HRegionInfo hri = location.getRegionInfo(); - HRegionInfo a = new HRegionInfo(tbl.getName(), - Bytes.toBytes("B"), Bytes.toBytes("BM")); - HRegionInfo b = new HRegionInfo(tbl.getName(), - Bytes.toBytes("BM"), Bytes.toBytes("C")); + HRegionInfo a = new HRegionInfo(tbl.getName(), Bytes.toBytes("B"), Bytes.toBytes("BM")); + HRegionInfo b = new HRegionInfo(tbl.getName(), Bytes.toBytes("BM"), Bytes.toBytes("C")); hri.setOffline(true); hri.setSplit(true); @@ -799,17 +815,23 @@ admin.flush(TableName.META_TABLE_NAME); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // regular repair cannot fix lingering split parent hbck = doFsck(conf, true); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); assertFalse(hbck.shouldRerun()); hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_SPLIT_PARENT, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // fix lingering split parent hbck = new HBaseFsck(conf, hbfsckExecutorService); @@ -823,10 +845,10 @@ Get get = new Get(hri.getRegionName()); Result result = meta.get(get); - assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, - HConstants.SPLITA_QUALIFIER).isEmpty()); - assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY, - HConstants.SPLITB_QUALIFIER).isEmpty()); + assertTrue( + result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER).isEmpty()); + assertTrue( + result.getColumnCells(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER).isEmpty()); admin.flush(TableName.META_TABLE_NAME); // fix other issues @@ -842,13 +864,12 @@ } /** - * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for - * valid cases where the daughters are there. + * Tests that LINGERING_SPLIT_PARENT is not erroneously reported for valid cases where the + * daughters are there. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testValidLingeringSplitParent() throws Exception { - TableName table = - TableName.valueOf("testLingeringSplitParent"); + TableName table = TableName.valueOf("testLingeringSplitParent"); Table meta = null; try { setupTable(table); @@ -857,7 +878,7 @@ // make sure data in regions, if in wal only there is no data loss admin.flush(table); - try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { + try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B")); meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); @@ -872,8 +893,10 @@ // for some time until children references are deleted. HBCK erroneously sees this as // overlapping regions HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, - false, false, null); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported + false, false, null); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {}); // no + // LINGERING_SPLIT_PARENT + // reported // assert that the split hbase:meta entry is still there. Get get = new Get(hri.getRegionName()); @@ -884,7 +907,7 @@ assertEquals(ROWKEYS.length, countRows()); // assert that we still have the split regions - assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions + assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); // SPLITS + 1 is # regions // pre-split. assertNoErrors(doFsck(conf, false)); } @@ -895,10 +918,10 @@ } /** - * Split crashed after write to hbase:meta finished for the parent region, but - * failed to write daughters (pre HBASE-7721 codebase) + * Split crashed after write to hbase:meta finished for the parent region, but failed to write + * daughters (pre HBASE-7721 codebase) */ - @Test(timeout=75000) + @Test(timeout = 75000) public void testSplitDaughtersNotInMeta() throws Exception { TableName table = TableName.valueOf("testSplitdaughtersNotInMeta"); Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService); @@ -909,7 +932,7 @@ // make sure data in regions, if in wal only there is no data loss admin.flush(table); - try(RegionLocator rl = connection.getRegionLocator(tbl.getName())) { + try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { HRegionLocation location = rl.getRegionLocation(Bytes.toBytes("B")); HRegionInfo hri = location.getRegionInfo(); @@ -919,8 +942,8 @@ admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM")); TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true); - PairOfSameType daughters = MetaTableAccessor.getDaughterRegions( - meta.get(new Get(regionName))); + PairOfSameType daughters = + MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName))); // Delete daughter regions from meta, but not hdfs, unassign it. @@ -938,24 +961,27 @@ meta.delete(deletes); // Remove daughters from regionStates - RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster(). - getAssignmentManager().getRegionStates(); + RegionStates regionStates = + TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStates(); regionStates.deleteRegion(daughters.getFirst()); regionStates.deleteRegion(daughters.getSecond()); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // no + // LINGERING_SPLIT_PARENT // now fix it. The fix should not revert the region split, but add daughters to META - hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, - false, false, null); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, + false, null); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // assert that the split hbase:meta entry is still there. Get get = new Get(hri.getRegionName()); @@ -966,9 +992,9 @@ assertEquals(ROWKEYS.length, countRows()); // assert that we still have the split regions - assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); //SPLITS + 1 is # regions + assertEquals(rl.getStartKeys().length, SPLITS.length + 1 + 1); // SPLITS + 1 is # regions // pre-split. - assertNoErrors(doFsck(conf, false)); //should be fixed by now + assertNoErrors(doFsck(conf, false)); // should be fixed by now } } finally { meta.close(); @@ -980,7 +1006,7 @@ * This creates and fixes a bad table with a missing region which is the 1st region -- hole in * meta and data missing in the fs. */ - @Test(timeout=120000) + @Test(timeout = 120000) public void testMissingFirstRegion() throws Exception { TableName table = TableName.valueOf("testMissingFirstRegion"); try { @@ -990,11 +1016,12 @@ // Mess it up by leaving a hole in the assignment, meta, and hdfs data admin.disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true, - true, true); + true, true); admin.enableTable(table); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY }); // fix hole doFsck(conf, true); // check that hole fixed @@ -1008,21 +1035,20 @@ * This creates and fixes a bad table with a missing region which is the 1st region -- hole in * meta and data missing in the fs. */ - @Test(timeout=120000) + @Test(timeout = 120000) public void testRegionDeployedNotInHdfs() throws Exception { - TableName table = - TableName.valueOf("testSingleRegionDeployedNotInHdfs"); + TableName table = TableName.valueOf("testSingleRegionDeployedNotInHdfs"); try { setupTable(table); admin.flush(table); // Mess it up by deleting region dir - deleteRegion(conf, tbl.getTableDescriptor(), - HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false, - false, true); + deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), + false, false, true); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); // fix hole doFsck(conf, true); // check that hole fixed @@ -1036,10 +1062,9 @@ * This creates and fixes a bad table with missing last region -- hole in meta and data missing in * the fs. */ - @Test(timeout=120000) + @Test(timeout = 120000) public void testMissingLastRegion() throws Exception { - TableName table = - TableName.valueOf("testMissingLastRegion"); + TableName table = TableName.valueOf("testMissingLastRegion"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1047,11 +1072,12 @@ // Mess it up by leaving a hole in the assignment, meta, and hdfs data admin.disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true, - true, true); + true, true); admin.enableTable(table); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY }); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY }); // fix hole doFsck(conf, true); // check that hole fixed @@ -1064,22 +1090,22 @@ /** * Test -noHdfsChecking option can detect and fix assignments issue. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testFixAssignmentsAndNoHdfsChecking() throws Exception { - TableName table = - TableName.valueOf("testFixAssignmentsAndNoHdfsChecking"); + TableName table = TableName.valueOf("testFixAssignmentsAndNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by closing a region deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, - false, false, false, HRegionInfo.DEFAULT_REPLICA_ID); + false, false, false, HRegionInfo.DEFAULT_REPLICA_ID); // verify there is no other errors HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking report the same errors HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); @@ -1089,7 +1115,8 @@ fsck.setCheckHdfs(false); fsck.onlineHbck(); assertErrors(fsck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); // verify that fixAssignments works fine with noHdfsChecking @@ -1113,26 +1140,26 @@ } /** - * Test -noHdfsChecking option can detect region is not in meta but deployed. - * However, it can not fix it without checking Hdfs because we need to get - * the region info from Hdfs in this case, then to patch the meta. + * Test -noHdfsChecking option can detect region is not in meta but deployed. However, it can not + * fix it without checking Hdfs because we need to get the region info from Hdfs in this case, + * then to patch the meta. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception { - TableName table = - TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking"); + TableName table = TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); // Mess it up by deleting a region from the metadata - deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), - Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID); + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), false, + true, false, false, HRegionInfo.DEFAULT_REPLICA_ID); // verify there is no other errors HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking report the same errors HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); @@ -1142,7 +1169,8 @@ fsck.setCheckHdfs(false); fsck.onlineHbck(); assertErrors(fsck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); // verify that fixMeta doesn't work with noHdfsChecking @@ -1156,7 +1184,8 @@ fsck.onlineHbck(); assertFalse(fsck.shouldRerun()); assertErrors(fsck, - new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); // fix the cluster so other tests won't be impacted @@ -1170,13 +1199,12 @@ } /** - * Test -fixHdfsHoles doesn't work with -noHdfsChecking option, - * and -noHdfsChecking can't detect orphan Hdfs region. + * Test -fixHdfsHoles doesn't work with -noHdfsChecking option, and -noHdfsChecking can't detect + * orphan Hdfs region. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception { - TableName table = - TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking"); + TableName table = TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1184,21 +1212,22 @@ // Mess it up by creating an overlap in the metadata admin.disableTable(table); deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"), true, - true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); + true, false, true, HRegionInfo.DEFAULT_REPLICA_ID); admin.enableTable(table); HRegionInfo hriOverlap = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); - TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() - .waitForAssignment(hriOverlap); + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriOverlap); ServerName server = regionStates.getRegionServerOfRegion(hriOverlap); TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN}); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService); @@ -1207,7 +1236,8 @@ fsck.setTimeLag(0); fsck.setCheckHdfs(false); fsck.onlineHbck(); - assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); // verify that fixHdfsHoles doesn't work with noHdfsChecking @@ -1221,7 +1251,8 @@ fsck.setFixHdfsOrphans(true); fsck.onlineHbck(); assertFalse(fsck.shouldRerun()); - assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + assertErrors(fsck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); fsck.close(); } finally { if (admin.isTableDisabled(table)) { @@ -1232,9 +1263,9 @@ } /** - * This creates a table and then corrupts an hfile. Hbck should quarantine the file. + * This creates a table and then corrupts an hfile. Hbck should quarantine the file. */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testQuarantineCorruptHFile() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); try { @@ -1275,7 +1306,7 @@ * This creates a table and simulates the race situation where a concurrent compaction or split * has removed an hfile after the corruption checker learned about it. */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testQuarantineMissingHFile() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); @@ -1287,6 +1318,7 @@ throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); + @Override protected void checkHFile(Path p) throws IOException { if (attemptedFirstHFile.compareAndSet(false, true)) { @@ -1304,7 +1336,7 @@ /** * This creates and fixes a bad table with regions that has startkey == endkey */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testDegenerateRegions() throws Exception { TableName table = TableName.valueOf("tableDegenerateRegions"); try { @@ -1316,14 +1348,16 @@ HRegionInfo hriDupe = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B")); TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe); - TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() - .waitForAssignment(hriDupe); + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(hriDupe); ServerName server = regionStates.getRegionServerOfRegion(hriDupe); TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT); - HBaseFsck hbck = doFsck(conf,false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION, HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, - HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS }); + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.DEGENERATE_REGION, + HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, + HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS }); assertEquals(2, hbck.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); @@ -1331,7 +1365,7 @@ doFsck(conf, true); // check that the degenerate region is gone and no data loss - HBaseFsck hbck2 = doFsck(conf,false); + HBaseFsck hbck2 = doFsck(conf, false); assertNoErrors(hbck2); assertEquals(0, hbck2.getOverlapGroups(table).size()); assertEquals(ROWKEYS.length, countRows()); @@ -1343,7 +1377,7 @@ /** * Test mission REGIONINFO_QUALIFIER in hbase:meta */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testMissingRegionInfoQualifier() throws Exception { Connection connection = ConnectionFactory.createConnection(conf); TableName table = TableName.valueOf("testMissingRegionInfoQualifier"); @@ -1369,22 +1403,22 @@ meta.delete(deletes); // Mess it up by creating a fake hbase:meta entry with no associated RegionInfo - meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) - .addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - Bytes.toBytes("node1:60020"))); - meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")) - .addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, - Bytes.toBytes(1362150791183L))); + meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).addColumn( + HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020"))); + meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).addColumn( + HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L))); meta.close(); HBaseFsck hbck = doFsck(conf, false); - assertTrue(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); + assertTrue(hbck.getErrors().getErrorList() + .contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); // fix reference file hbck = doFsck(conf, true); // check that reference file fixed - assertFalse(hbck.getErrors().getErrorList().contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); + assertFalse(hbck.getErrors().getErrorList() + .contains(HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL)); } finally { cleanupTable(table); } @@ -1392,10 +1426,9 @@ } /** - * Test pluggable error reporter. It can be plugged in - * from system property or configuration. + * Test pluggable error reporter. It can be plugged in from system property or configuration. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testErrorReporter() throws Exception { try { MockErrorReporter.calledCount = 0; @@ -1406,13 +1439,12 @@ doFsck(conf, false); assertTrue(MockErrorReporter.calledCount > 20); } finally { - conf.set("hbasefsck.errorreporter", - HBaseFsck.PrintingErrorReporter.class.getName()); + conf.set("hbasefsck.errorreporter", HBaseFsck.PrintingErrorReporter.class.getName()); MockErrorReporter.calledCount = 0; } } - @Test(timeout=60000) + @Test(timeout = 60000) public void testCheckTableLocks() throws Exception { IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0); EnvironmentEdgeManager.injectEdge(edge); @@ -1426,24 +1458,25 @@ // obtain one lock final TableLockManager tableLockManager = TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName); - TableLockManager.TableLock - writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks"); + TableLockManager.TableLock writeLock = + tableLockManager.writeLock(tableName, "testCheckTableLocks"); writeLock.acquire(); hbck = doFsck(conf, false); assertNoErrors(hbck); // should not have expired, no problems edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, - TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire + TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); + assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK }); final CountDownLatch latch = new CountDownLatch(1); new Thread() { @Override public void run() { - TableLockManager.TableLock - readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks"); + TableLockManager.TableLock readLock = + tableLockManager.writeLock(tableName, "testCheckTableLocks"); try { latch.countDown(); readLock.acquire(); @@ -1461,14 +1494,17 @@ hbck = doFsck(conf, false); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // still one expired, one not-expired + HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK }); // still one expired, one + // not-expired edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, - TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire + TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS)); // let table lock expire hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, - HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK}); // both are expired + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK, + HBaseFsck.ErrorReporter.ERROR_CODE.EXPIRED_TABLE_LOCK }); // both are expired conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1); // reaping from ZKInterProcessWriteLock uses znode cTime, @@ -1487,25 +1523,24 @@ tableLockManager.tableDeleted(tableName); } - @Test(timeout=180000) + @Test(timeout = 180000) public void testCheckReplication() throws Exception { // check no errors HBaseFsck hbck = doFsck(conf, false); assertNoErrors(hbck); - + // create peer ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf); Assert.assertEquals(0, replicationAdmin.getPeersCount()); - int zkPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, - HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); + int zkPort = + conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); replicationAdmin.addPeer("1", "127.0.0.1:" + zkPort + ":/hbase"); replicationAdmin.getPeersCount(); Assert.assertEquals(1, replicationAdmin.getPeersCount()); - + // create replicator ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test Hbase Fsck", connection); - ReplicationQueues repQueues = - ReplicationFactory.getReplicationQueues(zkw, conf, connection); + ReplicationQueues repQueues = ReplicationFactory.getReplicationQueues(zkw, conf, connection); repQueues.init("server1"); // queues for current peer, no errors repQueues.addLog("1", "file1"); @@ -1513,16 +1548,17 @@ Assert.assertEquals(2, repQueues.getAllQueues().size()); hbck = doFsck(conf, false); assertNoErrors(hbck); - + // queues for removed peer repQueues.addLog("2", "file1"); repQueues.addLog("2-server2", "file1"); Assert.assertEquals(4, repQueues.getAllQueues().size()); hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, - HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE }); - + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, + HBaseFsck.ErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE }); + // fix the case hbck = doFsck(conf, true); hbck = doFsck(conf, false); @@ -1531,7 +1567,7 @@ Assert.assertEquals(2, repQueues.getAllQueues().size()); Assert.assertNull(repQueues.getLogsInQueue("2")); Assert.assertNull(repQueues.getLogsInQueue("2-sever2")); - + replicationAdmin.removePeer("1"); repQueues.removeAllQueues(); zkw.close(); @@ -1539,10 +1575,10 @@ } /** - * This creates and fixes a bad table with a missing region -- hole in meta - * and data present but .regioinfino missing (an orphan hdfs region)in the fs. + * This creates and fixes a bad table with a missing region -- hole in meta and data present but + * .regioinfino missing (an orphan hdfs region)in the fs. */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testHDFSRegioninfoMissing() throws Exception { TableName table = TableName.valueOf("tableHDFSRegioninfoMissing"); try { @@ -1558,9 +1594,9 @@ HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); @@ -1576,13 +1612,12 @@ } /** - * This creates and fixes a bad table with a region that is missing meta and - * not assigned to a region server. + * This creates and fixes a bad table with a region that is missing meta and not assigned to a + * region server. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testNotInMetaOrDeployedHole() throws Exception { - TableName table = - TableName.valueOf("tableNotInMetaOrDeployedHole"); + TableName table = TableName.valueOf("tableNotInMetaOrDeployedHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1596,16 +1631,16 @@ HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole assertErrors(doFsck(conf, true), new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // check that hole fixed assertNoErrors(doFsck(conf, false)); @@ -1615,7 +1650,7 @@ } } - @Test (timeout=180000) + @Test(timeout = 180000) public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception { TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit"); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); @@ -1635,9 +1670,9 @@ int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); HRegionServer regionServer = cluster.getRegionServer(serverWith); cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); - SplitTransactionImpl st = (SplitTransactionImpl) - new SplitTransactionFactory(TEST_UTIL.getConfiguration()) - .create(regions.get(0), Bytes.toBytes("r3")); + SplitTransactionImpl st = + (SplitTransactionImpl) new SplitTransactionFactory(TEST_UTIL.getConfiguration()) + .create(regions.get(0), Bytes.toBytes("r3")); st.prepare(); st.stepsBeforePONR(regionServer, regionServer, false); AssignmentManager am = cluster.getMaster().getAssignmentManager(); @@ -1650,9 +1685,10 @@ am.assign(regionsMap); am.waitForAssignment(regions.get(0).getRegionInfo()); HBaseFsck hbck = doFsck(conf, false); - assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + assertErrors(hbck, + new HBaseFsck.ErrorReporter.ERROR_CODE[] { + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); @@ -1661,8 +1697,8 @@ doFsck(conf, false, true, false, false, false, false, false, false, false, false, false, null), new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED }); // check that hole fixed assertNoErrors(doFsck(conf, false)); @@ -1679,10 +1715,9 @@ /** * This creates fixes a bad table with a hole in meta. */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testNotInMetaHole() throws Exception { - TableName table = - TableName.valueOf("tableNotInMetaHole"); + TableName table = TableName.valueOf("tableNotInMetaHole"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1696,16 +1731,16 @@ HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole assertErrors(doFsck(conf, true), new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN }); // check that hole fixed assertNoErrors(doFsck(conf, false)); @@ -1716,13 +1751,12 @@ } /** - * This creates and fixes a bad table with a region that is in meta but has - * no deployment or data hdfs + * This creates and fixes a bad table with a region that is in meta but has no deployment or data + * hdfs */ - @Test (timeout=180000) + @Test(timeout = 180000) public void testNotInHdfs() throws Exception { - TableName table = - TableName.valueOf("tableNotInHdfs"); + TableName table = TableName.valueOf("tableNotInHdfs"); try { setupTable(table); assertEquals(ROWKEYS.length, countRows()); @@ -1736,7 +1770,7 @@ HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { - HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS}); + HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_HDFS }); // holes are separate from overlap groups assertEquals(0, hbck.getOverlapGroups(table).size()); @@ -1744,7 +1778,7 @@ doFsck(conf, true); // check that hole fixed - assertNoErrors(doFsck(conf,false)); + assertNoErrors(doFsck(conf, false)); assertEquals(ROWKEYS.length - 2, countRows()); } finally { cleanupTable(table); @@ -1755,10 +1789,10 @@ * This creates a table and simulates the race situation where a concurrent compaction or split * has removed an colfam dir before the corruption checker got to it. */ - // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no + // Disabled because fails sporadically. Is this test right? Timing-wise, there could be no // files in a column family on initial creation -- as suggested by Matteo. @Ignore - @Test(timeout=180000) + @Test(timeout = 180000) public void testQuarantineMissingFamdir() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); // inject a fault in the hfcc created. @@ -1766,9 +1800,10 @@ HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) { @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) - throws IOException { + throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); + @Override protected void checkColFamDir(Path p) throws IOException { if (attemptedFirstHFile.compareAndSet(false, true)) { @@ -1787,7 +1822,7 @@ * This creates a table and simulates the race situation where a concurrent compaction or split * has removed a region dir before the corruption checker got to it. */ - @Test(timeout=180000) + @Test(timeout = 180000) public void testQuarantineMissingRegionDir() throws Exception { TableName table = TableName.valueOf(name.getMethodName()); // inject a fault in the hfcc created. @@ -1795,9 +1830,10 @@ HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) { @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) - throws IOException { + throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false); + @Override protected void checkRegionDir(Path p) throws IOException { if (attemptedFirstHFile.compareAndSet(false, true)) {