diff --git a/src/main/java/org/apache/hadoop/hbase/HConstants.java b/src/main/java/org/apache/hadoop/hbase/HConstants.java index 485006b..92a0783 100644 --- a/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -327,6 +327,12 @@ public final class HConstants { /** The upper-half split region column qualifier */ public static final byte [] SPLITB_QUALIFIER = Bytes.toBytes("splitB"); + /** The lower-half merge region column qualifier */ + public static final byte[] MERGEA_QUALIFIER = Bytes.toBytes("mergeA"); + + /** The upper-half merge region column qualifier */ + public static final byte[] MERGEB_QUALIFIER = Bytes.toBytes("mergeB"); + /** * The meta table version column qualifier. * We keep current version of the meta table in this column in -ROOT- diff --git a/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java b/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java index 9420505..2c52a07 100644 --- a/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java +++ b/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java @@ -61,11 +61,11 @@ implements WritableComparable { * where, * <encodedName> is a hex version of the MD5 hash of * <tablename>,<startkey>,<regionIdTimestamp> - * + * * The old region name format: * <tablename>,<startkey>,<regionIdTimestamp> * For region names in the old format, the encoded name is a 32-bit - * JenkinsHash integer value (in its decimal notation, string form). + * JenkinsHash integer value (in its decimal notation, string form). *

* **NOTE** * @@ -75,8 +75,8 @@ implements WritableComparable { */ /** Separator used to demarcate the encodedName in a region name - * in the new format. See description on new format above. - */ + * in the new format. See description on new format above. + */ private static final int ENC_SEPARATOR = '.'; public static final int MD5_HEX_LENGTH = 32; @@ -94,11 +94,11 @@ implements WritableComparable { if ((regionName.length >= 1) && (regionName[regionName.length - 1] == ENC_SEPARATOR)) { // region name is new format. it contains the encoded name. - return true; + return true; } return false; } - + /** * @param regionName * @return the encodedName @@ -112,7 +112,7 @@ implements WritableComparable { regionName.length - MD5_HEX_LENGTH - 1, MD5_HEX_LENGTH); } else { - // old format region name. ROOT and first META region also + // old format region name. ROOT and first META region also // use this format.EncodedName is the JenkinsHash value. int hashVal = Math.abs(JenkinsHash.getInstance().hash(regionName, regionName.length, 0)); @@ -373,7 +373,7 @@ implements WritableComparable { if (md5HashBytes.length != MD5_HEX_LENGTH) { LOG.error("MD5-hash length mismatch: Expected=" + MD5_HEX_LENGTH + - "; Got=" + md5HashBytes.length); + "; Got=" + md5HashBytes.length); } // now append the bytes '..' to the end @@ -382,7 +382,7 @@ implements WritableComparable { offset += MD5_HEX_LENGTH; b[offset++] = ENC_SEPARATOR; } - + return b; } @@ -493,7 +493,7 @@ implements WritableComparable { public byte [] getStartKey(){ return startKey; } - + /** @return the endKey */ public byte [] getEndKey(){ return endKey; @@ -538,7 +538,7 @@ implements WritableComparable { Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY); return firstKeyInRange && lastKeyInRange; } - + /** * Return true if the given row falls in this region. */ @@ -804,7 +804,7 @@ implements WritableComparable { if (this.offLine == o.offLine) return 0; if (this.offLine == true) return -1; - + return 1; } @@ -815,4 +815,28 @@ implements WritableComparable { return isRootRegion()? KeyValue.ROOT_COMPARATOR: isMetaRegion()? KeyValue.META_COMPARATOR: KeyValue.COMPARATOR; } + + /** + * Check whether two regions are adjacent + * @param regionA + * @param regionB + * @return true if two regions are adjacent + */ + public static boolean areAdjacent(HRegionInfo regionA, HRegionInfo regionB) { + if (regionA == null || regionB == null) { + throw new IllegalArgumentException( + "Can't check whether adjacent for null region"); + } + HRegionInfo a = regionA; + HRegionInfo b = regionB; + if (Bytes.compareTo(a.getStartKey(), b.getStartKey()) > 0) { + a = regionB; + b = regionA; + } + if (Bytes.compareTo(a.getEndKey(), b.getStartKey()) == 0) { + return true; + } + return false; + } + } diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java b/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java index cca2353..540c289 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.net.ConnectException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; @@ -211,6 +212,18 @@ public class MetaEditor { } /** + * Generates and returns a Delete containing the region info for the catalog + * table + */ + public static Delete makeDeleteFromRegionInfo(HRegionInfo regionInfo) { + if (regionInfo == null) { + throw new IllegalArgumentException("Can't make a delete for null region"); + } + Delete delete = new Delete(regionInfo.getRegionName()); + return delete; + } + + /** * Adds split daughters to the Put */ public static Put addDaughtersToPut(Put put, HRegionInfo splitA, HRegionInfo splitB) @@ -225,6 +238,40 @@ public class MetaEditor { } /** + * Merge the two regions into one in an atomic operation. Deletes the two + * merging regions in META and adds the merged region with the information of + * two merging regions. + * @param catalogTracker the catalog tracker + * @param mergedRegion the merged region + * @param regionA + * @param regionB + * @param sn the location of the region + * @throws IOException + */ + public static void mergeRegions(final CatalogTracker catalogTracker, + HRegionInfo mergedRegion, HRegionInfo regionA, HRegionInfo regionB, + ServerName sn) throws IOException { + HTable meta = MetaReader.getMetaHTable(catalogTracker); + HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion); + + // Put for parent + Put putOfMerged = makePutFromRegionInfo(copyOfMerged); + putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER, + Writables.getBytes(regionA)); + putOfMerged.add(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER, + Writables.getBytes(regionB)); + + // Deletes for merging regions + Delete deleteA = makeDeleteFromRegionInfo(regionA); + Delete deleteB = makeDeleteFromRegionInfo(regionB); + + addLocation(putOfMerged, sn); + + byte[] tableRow = mergedRegion.getRegionName(); + multiMutate(meta, tableRow, putOfMerged, deleteA, deleteB); + } + + /** * Splits the region into two in an atomic operation. Offlines the parent * region with the information that it is split into two, and also adds * the daughter regions. Does not add the location information to the daughter @@ -255,20 +302,16 @@ public class MetaEditor { addLocation(putB, sn); byte[] tableRow = parent.getRegionName(); - multiPut(meta, tableRow, putParent, putA, putB); + multiMutate(meta, tableRow, putParent, putA, putB); } /** * Performs an atomic multi-Put operation against the given table. */ - private static void multiPut(HTable table, byte[] row, Put... puts) throws IOException { + private static void multiMutate(HTable table, byte[] row, Mutation... mutations) throws IOException { MultiRowMutationProtocol endpoint = table.coprocessorProxy(MultiRowMutationProtocol.class, row); - List mutations = new ArrayList(puts.length); - for (Put put : puts) { - mutations.add(put); - } - endpoint.mutateRows(mutations); + endpoint.mutateRows(Arrays.asList(mutations)); } @@ -434,6 +477,24 @@ public class MetaEditor { return new PairOfSameType(splitA, splitB); } + /** + * Deletes merge qualifiers for the specified merged region. + * @param catalogTracker + * @param mergedRegion + * @throws IOException + */ + public static void deleteMergeQualifiers(CatalogTracker catalogTracker, + final HRegionInfo mergedRegion) throws IOException { + Delete delete = new Delete(mergedRegion.getRegionName()); + delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER); + delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER); + deleteFromMetaTable(catalogTracker, delete); + LOG.info("Deleted references in merged region " + + mergedRegion.getRegionNameAsString() + ", qualifier=" + + Bytes.toStringBinary(HConstants.MERGEA_QUALIFIER) + " and qualifier=" + + Bytes.toStringBinary(HConstants.MERGEB_QUALIFIER)); + } + private static Put addRegionInfo(final Put p, final HRegionInfo hri) throws IOException { p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index cbb6d48..5998ae4 100644 --- a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -204,7 +204,7 @@ public class MetaReader { /** * Callers should call close on the returned {@link HTable} instance. * @param catalogTracker - * @param row Row we are putting + * @param row Row we are putting * @return * @throws IOException */ @@ -406,6 +406,38 @@ public class MetaReader { /** + * Gets the result in META for the specified region. + * @param catalogTracker + * @param regionName + * @return result of the specified region + * @throws IOException + */ + public static Result getRegionResult(CatalogTracker catalogTracker, + byte[] regionName) throws IOException { + Get get = new Get(regionName); + get.addFamily(HConstants.CATALOG_FAMILY); + return get(getCatalogHTable(catalogTracker, regionName), get); + } + + /** + * Get regions from the merge qualifier of the specified merged region + * @return null if it doesn't contain merge qualifier, else two merge regions + * @throws IOException + */ + public static Pair getRegionsFromMergeQualifier( + CatalogTracker catalogTracker, byte[] regionName) throws IOException { + Result result = getRegionResult(catalogTracker, regionName); + HRegionInfo mergeA = parseHRegionInfoFromCatalogResult(result, + HConstants.MERGEA_QUALIFIER); + HRegionInfo mergeB = parseHRegionInfoFromCatalogResult(result, + HConstants.MERGEB_QUALIFIER); + if (mergeA == null && mergeB == null) { + return null; + } + return new Pair(mergeA, mergeB); + } + + /** * Checks if the specified table exists. Looks at the META table hosted on * the specified server. * @param catalogTracker @@ -639,7 +671,7 @@ public class MetaReader { void add(Result r) { if (r == null || r.isEmpty()) return; ServerName sn = getServerNameFromCatalogResult(r); - if (sn != null && sn.equals(serverName)) this.results.add(r); + if (sn != null && sn.equals(serverName)) this.results.add(r); } }; fullScan(catalogTracker, v); diff --git a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 3410b81..e17769d 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -77,8 +77,6 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StringUtils; -import com.google.protobuf.ServiceException; - /** * Provides an interface to manage HBase database table metadata + general * administrative functions. Use HBaseAdmin to create, drop, list, enable and @@ -1618,6 +1616,25 @@ public class HBaseAdmin implements Abortable, Closeable { } /** + * Merge two regions. Asynchronous operation. + * @param encodedNameOfRegionA encoded name of region a + * @param encodedNameOfRegionB encoded name of region b + * @param forcible true if do a compulsory merge, otherwise we will only merge + * two adjacent regions + * @throws IOException + */ + public void mergeRegions(final byte[] encodedNameOfRegionA, + final byte[] encodedNameOfRegionB, final boolean forcible) + throws IOException { + try { + getMaster().dispatchMergingRegions(encodedNameOfRegionA, encodedNameOfRegionB, forcible); + } catch (RemoteException re) { + throw RemoteExceptionHandler.decodeRemoteException(re); + } + } + + + /** * Modify an existing table, more IRB friendly version. * Asynchronous operation. This means that it may be a while before your * schema change is updated across all of the table. diff --git a/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java b/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java index 1be8a0f..c205959 100644 --- a/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java @@ -95,7 +95,7 @@ public abstract class EventHandler implements Runnable, Comparable { * originated and then where its destined -- e.g. RS2ZK_ prefix means the * event came from a regionserver destined for zookeeper -- and then what * the even is; e.g. REGION_OPENING. - * + * *

We give the enums indices so we can add types later and keep them * grouped together rather than have to add them always to the end as we * would have to if we used raw enum ordinals. @@ -110,6 +110,8 @@ public abstract class EventHandler implements Runnable, Comparable { RS_ZK_REGION_SPLITTING (5), // RS has started a region split RS_ZK_REGION_SPLIT (6), // RS split has completed. RS_ZK_REGION_FAILED_OPEN (7), // RS failed to open a region + RS_ZK_REGION_MERGING (8), // RS has started merging regions + RS_ZK_REGION_MERGE (9), // RS region merge has completed. // Messages originating from Master to RS M_RS_OPEN_REGION (20), // Master asking RS to open a region @@ -120,6 +122,7 @@ public abstract class EventHandler implements Runnable, Comparable { M_RS_CLOSE_META (25), // Master asking RS to close meta // Messages originating from Client to Master + C_M_MERGE_REGION (30), // Client asking Master to merge regions C_M_DELETE_TABLE (40), // Client asking Master to delete a table C_M_DISABLE_TABLE (41), // Client asking Master to disable a table C_M_ENABLE_TABLE (42), // Client asking Master to enable a table @@ -241,7 +244,7 @@ public abstract class EventHandler implements Runnable, Comparable { public synchronized void setListener(EventHandlerListener listener) { this.listener = listener; } - + @Override public String toString() { return "Event #" + getSeqid() + diff --git a/src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java b/src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java index a34dd69..5ddce55 100644 --- a/src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java +++ b/src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.executor; import java.io.IOException; -import java.io.PrintWriter; import java.io.Writer; import java.lang.management.ThreadInfo; import java.util.List; @@ -30,8 +29,6 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.RejectedExecutionHandler; -import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -39,7 +36,6 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.executor.EventHandler.EventHandlerListener; -import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.monitoring.ThreadMonitoring; import com.google.common.collect.Lists; @@ -125,6 +121,7 @@ public class ExecutorService { return ExecutorType.MASTER_OPEN_REGION; case RS_ZK_REGION_SPLIT: + case RS_ZK_REGION_MERGE: case M_SERVER_SHUTDOWN: return ExecutorType.MASTER_SERVER_OPERATIONS; @@ -138,6 +135,7 @@ public class ExecutorService { case C_M_CREATE_TABLE: case C_M_SNAPSHOT_TABLE: case C_M_RESTORE_SNAPSHOT: + case C_M_MERGE_REGION: return ExecutorType.MASTER_TABLE_OPERATIONS; // RegionServer executor services @@ -271,7 +269,7 @@ public class ExecutorService { } return ret; } - + /** * Executor instance. */ @@ -316,7 +314,7 @@ public class ExecutorService { } this.threadPoolExecutor.execute(event); } - + public String toString() { return getClass().getSimpleName() + "-" + id + "-" + name; } @@ -330,7 +328,7 @@ public class ExecutorService { } queuedEvents.add((EventHandler)r); } - + List running = Lists.newArrayList(); for (Map.Entry e : threadPoolExecutor.getRunningTasks().entrySet()) { @@ -341,18 +339,18 @@ public class ExecutorService { } running.add(new RunningEventStatus(e.getKey(), (EventHandler)r)); } - + return new ExecutorStatus(this, queuedEvents, running); } } - + /** * A subclass of ThreadPoolExecutor that keeps track of the Runnables that * are executing at any given point in time. */ static class TrackingThreadPoolExecutor extends ThreadPoolExecutor { - private ConcurrentMap running = Maps.newConcurrentMap(); - + private ConcurrentMap running = Maps.newConcurrentMap(); + public TrackingThreadPoolExecutor(int corePoolSize, int maximumPoolSize, long keepAliveTime, TimeUnit unit, BlockingQueue workQueue) { super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue); @@ -370,7 +368,7 @@ public class ExecutorService { assert oldPut == null : "inconsistency for thread " + t; super.beforeExecute(t, r); } - + /** * @return a map of the threads currently running tasks * inside this executor. Each key is an active thread, @@ -401,7 +399,7 @@ public class ExecutorService { this.queuedEvents = queuedEvents; this.running = running; } - + /** * Dump a textual representation of the executor's status * to the given writer. diff --git a/src/main/java/org/apache/hadoop/hbase/io/Reference.java b/src/main/java/org/apache/hadoop/hbase/io/Reference.java index 99ecb7e..3996dc3 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/Reference.java +++ b/src/main/java/org/apache/hadoop/hbase/io/Reference.java @@ -29,7 +29,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.io.Writable; /** @@ -82,6 +81,22 @@ public class Reference implements Writable { } /** + * @param splitRow + * @return A {@link Reference} that points at top half of a an hfile + */ + public static Reference createTopReference(final byte [] splitRow) { + return new Reference(splitRow, Range.top); + } + + /** + * @param splitRow + * @return A {@link Reference} that points at the bottom half of a an hfile + */ + public static Reference createBottomReference(final byte [] splitRow) { + return new Reference(splitRow, Range.bottom); + } + + /** * * @return Range */ diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java b/src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java index 5cd7a69..5e29c63 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java @@ -28,8 +28,8 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; -import org.apache.hadoop.hbase.security.TokenInfo; import org.apache.hadoop.hbase.security.KerberosInfo; +import org.apache.hadoop.hbase.security.TokenInfo; import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; import org.apache.hadoop.hbase.util.Pair; @@ -54,11 +54,11 @@ public interface HMasterInterface extends VersionedProtocol { // meant all HBase RPC was broke though only one of the three RPC Interfaces // had changed. This has since been undone. // 29: 4/3/2010 - changed ClusterStatus serialization - // 30: 3/20/2012 - HBASE-5589: Added offline method - + // 30: 3/20/2012 - HBASE-5589: Added offline method + // NOTE: Not bumped from 29 to maintain compatibility since this addition is // after the v0.92.0 releases this is applied to. This is not bumped for - // 0.94.0 to maintain rolling restart compatibility with 0.92.x. + // 0.94.0 to maintain rolling restart compatibility with 0.92.x. public static final long VERSION = 29L; /** @return true if master is available */ @@ -199,13 +199,13 @@ public interface HMasterInterface extends VersionedProtocol { /** * Assign a region to a server chosen at random. - * + * * @param regionName * Region to assign. Will use existing RegionPlan if one found. * @throws IOException */ public void assign(final byte[] regionName) throws IOException; - + /** * Unassign a region from current hosting regionserver. Region will then be * assigned to a regionserver chosen at random. Region could be reassigned @@ -305,4 +305,7 @@ public interface HMasterInterface extends VersionedProtocol { public boolean isRestoreSnapshotDone(final HSnapshotDescription request) throws IOException; + + public void dispatchMergingRegions(final byte[] encodedNameOfRegionA, + final byte[] encodedNameOfRegionB, final boolean forcible) throws IOException; } diff --git a/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java b/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java index 9886b3a..da8ab00 100644 --- a/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java +++ b/src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Increment; @@ -37,6 +36,7 @@ import org.apache.hadoop.hbase.client.MultiAction; import org.apache.hadoop.hbase.client.MultiResponse; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; @@ -44,14 +44,12 @@ import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; import org.apache.hadoop.hbase.io.hfile.BlockCacheColumnFamilySummary; import org.apache.hadoop.hbase.regionserver.RegionOpeningState; -import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest.CompactionState; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; import org.apache.hadoop.hbase.regionserver.wal.HLog; -import org.apache.hadoop.hbase.security.TokenInfo; import org.apache.hadoop.hbase.security.KerberosInfo; +import org.apache.hadoop.hbase.security.TokenInfo; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.ipc.RemoteException; -import org.apache.hadoop.hbase.ipc.VersionedProtocol; /** * Clients interact with HRegionServers using a handle to the HRegionInterface. @@ -383,7 +381,7 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl /** * Atomically bulk load multiple HFiles (say from different column families) * into an open region. - * + * * @param familyPaths List of (family, hfile path) pairs * @param regionName name of region to load hfiles into * @return true if successful, false if failed recoverably @@ -396,12 +394,12 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl /** * Opens the specified region. - * + * * @param region * region to open - * @return RegionOpeningState + * @return RegionOpeningState * OPENED - if region open request was successful. - * ALREADY_OPENED - if the region was already opened. + * ALREADY_OPENED - if the region was already opened. * FAILED_OPENING - if region opening failed. * * @throws IOException @@ -415,15 +413,15 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl * @param versionOfOfflineNode * the version of znode to compare when RS transitions the znode from * OFFLINE state. - * @return RegionOpeningState + * @return RegionOpeningState * OPENED - if region open request was successful. - * ALREADY_OPENED - if the region was already opened. + * ALREADY_OPENED - if the region was already opened. * FAILED_OPENING - if region opening failed. * @throws IOException */ public RegionOpeningState openRegion(HRegionInfo region, int versionOfOfflineNode) throws IOException; - + /** * Opens the specified regions. * @param regions regions to open @@ -463,12 +461,12 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl */ public boolean closeRegion(final HRegionInfo region, final boolean zk) throws IOException; - + /** * Closes the region in the RS with the specified encoded regionName and will * use or not use ZK during the close according to the specified flag. Note * that the encoded region name is in byte format. - * + * * @param encodedRegionName * in bytes * @param zk @@ -521,6 +519,17 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl throws NotServingRegionException, IOException; /** + * Merge two regions. Asynchronous operation. + * @param regionInfoA HRI for region a + * @param regionInfoB HRO for region b + * @param forcible true if do a compulsory merge, otherwise we will only merge + * two adjacent regions + * @throws IOException + */ + void mergeRegions(final HRegionInfo regionInfoA, + final HRegionInfo regionInfoB, final boolean forcible) throws IOException; + + /** * Compacts the specified region. Performs a major compaction if specified. *

* This method is asynchronous. @@ -545,7 +554,7 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl */ void compactRegion(HRegionInfo regionInfo, boolean major, byte[] columnFamily) throws NotServingRegionException, IOException; - + /** * Replicates the given entries. The guarantee is that the given entries * will be durable on the slave cluster if this method returns without @@ -615,24 +624,24 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl final byte[] family, final byte[] qualifier, final CompareOp compareOp, final WritableByteArrayComparable comparator, final Delete delete) throws IOException; - + /** * Performs a BlockCache summary and returns a List of BlockCacheColumnFamilySummary objects. * This method could be fairly heavyweight in that it evaluates the entire HBase file-system - * against what is in the RegionServer BlockCache. - * + * against what is in the RegionServer BlockCache. + * * @return BlockCacheColumnFamilySummary * @throws IOException exception */ public List getBlockCacheColumnFamilySummaries() throws IOException; /** * Roll the log writer. That is, start writing log messages to a new file. - * + * * @throws IOException * @throws FailedLogCloseException * @return If lots of logs, flush the returned regions so next time through * we can clean logs. Returns null if nothing to flush. Names are actual - * region names as returned by {@link HRegionInfo#getEncodedName()} + * region names as returned by {@link HRegionInfo#getEncodedName()} */ public byte[][] rollHLogWriter() throws IOException, FailedLogCloseException; diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 3c9973e..cafe317 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.master.AssignmentManager.RegionState.State; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; +import org.apache.hadoop.hbase.master.handler.MergedRegionHandler; import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.SplitRegionHandler; @@ -160,7 +161,7 @@ public class AssignmentManager extends ZooKeeperListener { * Contains the server which need to update timer, these servers will be * handled by {@link TimerUpdater} */ - private final ConcurrentSkipListSet serversInUpdatingTimer = + private final ConcurrentSkipListSet serversInUpdatingTimer = new ConcurrentSkipListSet(); /** @@ -177,7 +178,7 @@ public class AssignmentManager extends ZooKeeperListener { //Thread pool executor service for timeout monitor private java.util.concurrent.ExecutorService threadPoolExecutorService; - + private List ignoreStatesRSOffline = Arrays.asList(new EventType[]{ EventType.RS_ZK_REGION_FAILED_OPEN, EventType.RS_ZK_REGION_CLOSED }); @@ -187,8 +188,8 @@ public class AssignmentManager extends ZooKeeperListener { */ private volatile boolean failover = false; - // Set holding all the regions which got processed while RIT was not - // populated during master failover. + // Set holding all the regions which got processed while RIT was not + // populated during master failover. private Map failoverProcessedRegions = new HashMap(); @@ -200,7 +201,7 @@ public class AssignmentManager extends ZooKeeperListener { * @param catalogTracker * @param service * @throws KeeperException - * @throws IOException + * @throws IOException */ public AssignmentManager(Server master, ServerManager serverManager, CatalogTracker catalogTracker, final LoadBalancer balancer, @@ -227,7 +228,7 @@ public class AssignmentManager extends ZooKeeperListener { this.balancer = balancer; this.threadPoolExecutorService = Executors.newCachedThreadPool(); } - + void startTimeOutMonitor() { Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), master.getServerName() + ".timeoutMonitor"); @@ -287,7 +288,7 @@ public class AssignmentManager extends ZooKeeperListener { /** * Gives enabling table regions. - * + * * @param tableName * @return list of regionInfos */ @@ -297,8 +298,8 @@ public class AssignmentManager extends ZooKeeperListener { /** * Add a regionPlan for the specified region. - * @param encodedName - * @param plan + * @param encodedName + * @param plan */ public void addPlan(String encodedName, RegionPlan plan) { synchronized (regionPlans) { @@ -407,7 +408,7 @@ public class AssignmentManager extends ZooKeeperListener { /** * Process all regions that are in transition in zookeeper and also - * processes the list of dead servers by scanning the META. + * processes the list of dead servers by scanning the META. * Used by master joining an cluster. * @param deadServers * Map of dead servers and their regions. Can be null. @@ -420,7 +421,7 @@ public class AssignmentManager extends ZooKeeperListener { throws KeeperException, IOException, InterruptedException { List nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.assignmentZNode); - + if (nodes == null) { String errorMessage = "Failed to get the children from ZK"; master.abort(errorMessage, new IOException(errorMessage)); @@ -505,7 +506,7 @@ public class AssignmentManager extends ZooKeeperListener { * up in zookeeper. * @param encodedRegionName Region to process failover for. * @param regionInfo If null we'll go get it from meta table. - * @param deadServers Can be null + * @param deadServers Can be null * @return True if we processed regionInfo as a RIT. * @throws KeeperException * @throws IOException @@ -520,7 +521,7 @@ public class AssignmentManager extends ZooKeeperListener { if (data == null) return false; HRegionInfo hri = regionInfo; if (hri == null) { - if ((hri = getHRegionInfo(data)) == null) return false; + if ((hri = getHRegionInfo(data)) == null) return false; } processRegionsInTransition(data, hri, deadServers, stat.getVersion()); return true; @@ -641,7 +642,7 @@ public class AssignmentManager extends ZooKeeperListener { } } } - + /** * Put the region hri into an offline state up in zk. @@ -794,6 +795,34 @@ public class AssignmentManager extends ZooKeeperListener { regionState.getRegion(), sn, daughters)); break; + case RS_ZK_REGION_MERGING: + // Merged region is a new region, we can't find it in the region states now. + // Do nothing. + break; + + case RS_ZK_REGION_MERGE: + // Assert that we can get a serverinfo for this server. + if (!this.serverManager.isServerOnline(sn)) { + LOG.error("Dropped merge! ServerName=" + sn + " unknown."); + break; + } + // Get merged and merging regions. + byte[] payloadOfMerge = data.getPayload(); + List mergeRegions; + try { + mergeRegions = Writables.getHRegionInfos(payloadOfMerge, 0, + payloadOfMerge.length); + } catch (IOException e) { + LOG.error("Dropped merge! Failed reading merge payload for " + + prettyPrintedRegionName); + break; + } + assert mergeRegions.size() == 3; + // Run handler to do the rest of the MERGE handling. + this.executorService.submit(new MergedRegionHandler(master, this, sn, + mergeRegions)); + break; + case M_ZK_REGION_CLOSING: hri = checkIfInFailover(regionState, encodedName, data); if (hri != null) { @@ -848,7 +877,7 @@ public class AssignmentManager extends ZooKeeperListener { this.executorService.submit(new ClosedRegionHandler(master, this, regionState.getRegion())); break; - + case RS_ZK_REGION_FAILED_OPEN: hri = checkIfInFailover(regionState, encodedName, data); if (hri != null) { @@ -870,7 +899,7 @@ public class AssignmentManager extends ZooKeeperListener { // Handle this the same as if it were opened and then closed. regionState.update(RegionState.State.CLOSED, data.getStamp(), data.getOrigin()); - // When there are more than one region server a new RS is selected as the + // When there are more than one region server a new RS is selected as the // destination and the same is updated in the regionplan. (HBASE-5546) getRegionPlan(regionState, sn, true); this.executorService.submit(new ClosedRegionHandler(master, @@ -878,7 +907,7 @@ public class AssignmentManager extends ZooKeeperListener { break; case RS_ZK_REGION_OPENING: - hri = checkIfInFailover(regionState, encodedName, data); + hri = checkIfInFailover(regionState, encodedName, data); if (hri != null) { regionState = new RegionState(hri, RegionState.State.OPENING, data .getStamp(), data.getOrigin()); @@ -950,11 +979,11 @@ public class AssignmentManager extends ZooKeeperListener { } return null; } - + /** * Gets the HRegionInfo from the META table * @param data - * @return HRegionInfo hri for the region + * @return HRegionInfo hri for the region */ private HRegionInfo getHRegionInfo(RegionTransitionData data) { Pair p = null; @@ -1255,13 +1284,13 @@ public class AssignmentManager extends ZooKeeperListener { ServerName oldSn = this.regions.get(regionInfo); if (oldSn != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() + " on " + oldSn + " with " + sn); - + if (isServerOnline(sn)) { this.regions.put(regionInfo, sn); addToServers(sn, regionInfo); this.regions.notifyAll(); } else { - LOG.info("The server is not in online servers, ServerName=" + + LOG.info("The server is not in online servers, ServerName=" + sn.getServerName() + ", region=" + regionInfo.getEncodedName()); } } @@ -1408,7 +1437,7 @@ public class AssignmentManager extends ZooKeeperListener { */ public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan, boolean hijack) { - // If hijack is true do not call disableRegionIfInRIT as + // If hijack is true do not call disableRegionIfInRIT as // we have not yet moved the znode to OFFLINE state. if (!hijack && isDisabledorDisablingRegionInRIT(region)) { return; @@ -1452,7 +1481,7 @@ public class AssignmentManager extends ZooKeeperListener { destination)); } this.addPlans(plans); - + // Presumption is that only this thread will be updating the state at this // time; i.e. handlers on backend won't be trying to set it to OPEN, etc. AtomicInteger counter = new AtomicInteger(0); @@ -1675,11 +1704,11 @@ public class AssignmentManager extends ZooKeeperListener { } } } - + if (setOfflineInZK && versionOfOfflineNode == -1) { return; } - + if (this.master.isStopped()) { LOG.debug("Server stopped; skipping assign of " + state); return; @@ -1748,7 +1777,7 @@ public class AssignmentManager extends ZooKeeperListener { } } } - if (t instanceof java.net.SocketTimeoutException + if (t instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(plan.getDestination())) { LOG.warn("Call openRegion() to " + plan.getDestination() + " has timed out when trying to assign " @@ -1801,38 +1830,38 @@ public class AssignmentManager extends ZooKeeperListener { /** * Set region as OFFLINED up in zookeeper - * + * * @param state * @param hijack * - true if needs to be hijacked and reassigned, false otherwise. - * @param regionAlreadyInTransitionException - * - true if we need to retry assignment because of RegionAlreadyInTransitionException. + * @param regionAlreadyInTransitionException + * - true if we need to retry assignment because of RegionAlreadyInTransitionException. * @return the version of the offline node if setting of the OFFLINE node was * successful, -1 otherwise. */ int setOfflineInZooKeeper(final RegionState state, boolean hijack, boolean regionAlreadyInTransitionException) { // In case of reassignment the current state in memory need not be - // OFFLINE. + // OFFLINE. if (!hijack && !state.isClosed() && !state.isOffline()) { if (!regionAlreadyInTransitionException ) { String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; this.master.abort(msg, new IllegalStateException(msg)); return -1; - } + } LOG.debug("Unexpected state : " + state + " but retrying to assign because RegionAlreadyInTransitionException."); } boolean allowZNodeCreation = false; // Under reassignment if the current state is PENDING_OPEN // or OPENING then refresh the in-memory state to PENDING_OPEN. This is - // important because if the region was in + // important because if the region was in // RS_OPENING state for a long time the master will try to force the znode // to OFFLINE state meanwhile the RS could have opened the corresponding // region and the state in znode will be RS_ZK_REGION_OPENED. // For all other cases we can change the in-memory state to OFFLINE. if (hijack && - (state.getState().equals(RegionState.State.PENDING_OPEN) || + (state.getState().equals(RegionState.State.PENDING_OPEN) || state.getState().equals(RegionState.State.OPENING))) { state.update(RegionState.State.PENDING_OPEN); allowZNodeCreation = false; @@ -1843,7 +1872,7 @@ public class AssignmentManager extends ZooKeeperListener { int versionOfOfflineNode = -1; try { // get the version after setting the znode to OFFLINE - versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(master.getZooKeeper(), + versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(master.getZooKeeper(), state.getRegion(), this.master.getServerName(), hijack, allowZNodeCreation); if (versionOfOfflineNode == -1) { @@ -1879,7 +1908,7 @@ public class AssignmentManager extends ZooKeeperListener { } catch (KeeperException e) { if (e instanceof NodeExistsException) { LOG.warn("Node for " + state.getRegion() + " already exists"); - } else { + } else { master.abort("Unexpected ZK exception creating/setting node OFFLINE", e); } return false; @@ -2090,9 +2119,9 @@ public class AssignmentManager extends ZooKeeperListener { NodeExistsException nee = (NodeExistsException)e; String path = nee.getPath(); try { - if (isSplitOrSplitting(path)) { - LOG.debug(path + " is SPLIT or SPLITTING; " + - "skipping unassign because region no longer exists -- its split"); + if (isSplitOrSplittingOrMergeOrMerging(path)) { + LOG.debug(path + " is SPLIT or SPLITTING or MERGE or MERGING; " + + "skipping unassign because region no longer exists -- its split or merge"); return; } } catch (KeeperException.NoNodeException ke) { @@ -2112,8 +2141,8 @@ public class AssignmentManager extends ZooKeeperListener { state = new RegionState(region, RegionState.State.PENDING_CLOSE); regionsInTransition.put(encodedName, state); } else if (force && (state.isPendingClose() || state.isClosing())) { - LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() + - " which is already " + state.getState() + + LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() + + " which is already " + state.getState() + " but forcing to send a CLOSE RPC again "); state.update(state.getState()); } else { @@ -2122,7 +2151,7 @@ public class AssignmentManager extends ZooKeeperListener { "already in transition (" + state.getState() + ", force=" + force + ")"); return; } - } + } // Send CLOSE RPC ServerName server = null; synchronized (this.regions) { @@ -2196,9 +2225,9 @@ public class AssignmentManager extends ZooKeeperListener { // Presume retry or server will expire. } } - + /** - * + * * @param region regioninfo of znode to be deleted. */ public void deleteClosingOrClosedNode(HRegionInfo region) { @@ -2227,18 +2256,20 @@ public class AssignmentManager extends ZooKeeperListener { /** * @param path - * @return True if znode is in SPLIT or SPLITTING state. + * @return True if znode is in SPLIT or SPLITTING or MERGE or MERGING state. * @throws KeeperException Can happen if the znode went away in meantime. */ - private boolean isSplitOrSplitting(final String path) throws KeeperException { + private boolean isSplitOrSplittingOrMergeOrMerging(final String path) throws KeeperException { boolean result = false; - // This may fail if the SPLIT or SPLITTING znode gets cleaned up before we - // can get data from it. + // This may fail if the SPLIT or SPLITTING or MERGE or MERGING znode gets + // cleaned up before we can get data from it. RegionTransitionData data = ZKAssign.getData(master.getZooKeeper(), path); EventType evt = data.getEventType(); switch (evt) { case RS_ZK_REGION_SPLIT: case RS_ZK_REGION_SPLITTING: + case RS_ZK_REGION_MERGE: + case RS_ZK_REGION_MERGING: result = true; break; default: @@ -2297,7 +2328,7 @@ public class AssignmentManager extends ZooKeeperListener { /** * Assigns all user regions to online servers. Use round-robin assignment. - * + * * @param regions * @throws IOException * @throws InterruptedException @@ -2338,7 +2369,7 @@ public class AssignmentManager extends ZooKeeperListener { boolean isTableEnabled = this.zkTable.isEnabledTable(tableName); if (!isTableEnabled) { setEnabledTable(tableName); - } + } } /** @@ -2582,7 +2613,7 @@ public class AssignmentManager extends ZooKeeperListener { // Region assignment from META List results = MetaReader.fullScan(this.catalogTracker); // Get any new but slow to checkin region server that joined the cluster - Set onlineServers = serverManager.getOnlineServers().keySet(); + Set onlineServers = serverManager.getOnlineServers().keySet(); // Map of offline servers and their regions to be returned Map>> offlineServers = new TreeMap>>(); @@ -2639,7 +2670,7 @@ public class AssignmentManager extends ZooKeeperListener { byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat); // If znode does not exist dont consider this region if (data == null) { - LOG.debug("Region "+ regionInfo.getRegionNameAsString() + " split is completed. " + LOG.debug("Region "+ regionInfo.getRegionNameAsString() + " split is completed. " + "Hence need not add to regions list"); continue; } @@ -2678,7 +2709,7 @@ public class AssignmentManager extends ZooKeeperListener { hris.add(regionInfo); } } - + private void enableTableIfNotDisabledOrDisablingOrEnabling(boolean disabled, boolean disablingOrEnabling, String tableName) { if (!disabled && !disablingOrEnabling @@ -2695,16 +2726,16 @@ public class AssignmentManager extends ZooKeeperListener { } else if (checkIfRegionsBelongsToEnabling(regionInfo)) { if (!this.enablingTables.containsKey(tableName)) { this.enablingTables.put(tableName, new ArrayList()); - } + } return true; - } + } return false; } /** * Recover the tables that were not fully moved to DISABLED state. These * tables are in DISABLING state when the master restarted/switched. - * + * * @param disablingTables * @return * @throws KeeperException @@ -2734,7 +2765,7 @@ public class AssignmentManager extends ZooKeeperListener { /** * Recover the tables that are not fully moved to ENABLED state. These tables * are in ENABLING state when the master restarted/switched - * + * * @param enablingTables * @param isWatcherCreated * @throws KeeperException @@ -2781,10 +2812,10 @@ public class AssignmentManager extends ZooKeeperListener { * Processes list of dead servers from result of META scan and regions in RIT *

* This is used for failover to recover the lost regions that belonged to - * RegionServers which failed while there was no active master or regions + * RegionServers which failed while there was no active master or regions * that were in RIT. *

- * + * * @param deadServers * The list of dead servers which failed while there was no active * master. Can be null. @@ -2798,7 +2829,7 @@ public class AssignmentManager extends ZooKeeperListener { List nodes) throws IOException, KeeperException { if (null != deadServers) { Set actualDeadServers = this.serverManager.getDeadServers(); - for (Map.Entry>> deadServer : + for (Map.Entry>> deadServer : deadServers.entrySet()) { // skip regions of dead servers because SSH will process regions during rs expiration. // see HBASE-5916 @@ -2822,7 +2853,7 @@ public class AssignmentManager extends ZooKeeperListener { // we consider that this region is being handled. // So we should skip it and process it in // processRegionsInTransition. - if (data != null && data.getOrigin() != null && + if (data != null && data.getOrigin() != null && serverManager.isServerOnline(data.getOrigin())) { LOG.info("The region " + regionInfo.getEncodedName() + "is being handled on " + data.getOrigin()); @@ -2958,7 +2989,7 @@ public class AssignmentManager extends ZooKeeperListener { */ public List getRegionsOfTable(byte[] tableName) { List tableRegions = new ArrayList(); - // boundary needs to have table's name but regionID 0 so that it is sorted + // boundary needs to have table's name but regionID 0 so that it is sorted // before all table's regions. HRegionInfo boundary = new HRegionInfo(tableName, null, null, false, 0L); @@ -3121,7 +3152,7 @@ public class AssignmentManager extends ZooKeeperListener { } } } - + private void processOpeningState(HRegionInfo regionInfo) { LOG.info("Region has been OPENING for too " + "long, reassigning region=" + regionInfo.getRegionNameAsString()); @@ -3294,7 +3325,7 @@ public class AssignmentManager extends ZooKeeperListener { * Can't let out original since it can change and at least the loadbalancer * wants to iterate this exported list. We need to synchronize on regions * since all access to this.servers is under a lock on this.regions. - * + * * @return A clone of current assignments by table. */ Map>> getAssignmentsByTable() { @@ -3337,7 +3368,7 @@ public class AssignmentManager extends ZooKeeperListener { } return result; } - + /** * @return A clone of current assignments. Note, this is assignments only. * If a new server has come in and it has no regions, it will not be included @@ -3376,9 +3407,31 @@ public class AssignmentManager extends ZooKeeperListener { } /** + * Update inmemory structures. + * @param sn Server that reported the merge + * @param merged regioninfo of merged + * @param a region a + * @param b region b + */ + public void handleRegionsMergeReport(final ServerName sn, + final HRegionInfo merged, final HRegionInfo a, final HRegionInfo b) { + regionOffline(a); + regionOffline(b); + regionOnline(merged, sn); + + // There's a possibility that the region was merging while a user asked + // the master to disable, we need to make sure we close those regions in + // that case. This is not racing with the region server itself since RS + // report is done after the regions merge transaction completed. + if (this.zkTable.isDisablingOrDisabledTable(merged.getTableNameAsString())) { + unassign(merged); + } + } + + /** * @param plan Plan to execute. */ - void balance(final RegionPlan plan) { + public void balance(final RegionPlan plan) { synchronized (this.regionPlans) { this.regionPlans.put(plan.getRegionName(), plan); } @@ -3507,7 +3560,7 @@ public class AssignmentManager extends ZooKeeperListener { public boolean isSplitting() { return state == State.SPLITTING; } - + public boolean isSplit() { return state == State.SPLIT; } @@ -3521,12 +3574,12 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * A slower (but more easy-to-read) stringification + * A slower (but more easy-to-read) stringification */ public String toDescriptiveString() { long lstamp = stamp.get(); long relTime = System.currentTimeMillis() - lstamp; - + return region.getRegionNameAsString() + " state=" + state + ", ts=" + new Date(lstamp) + " (" + (relTime/1000) + "s ago)" @@ -3553,10 +3606,10 @@ public class AssignmentManager extends ZooKeeperListener { this.timeoutMonitor.interrupt(); this.timerUpdater.interrupt(); } - + /** * Check whether the RegionServer is online. - * @param serverName + * @param serverName * @return True if online. */ public boolean isServerOnline(ServerName serverName) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 94ff5bc..5e42d9a 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -48,13 +48,14 @@ import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.hbase.util.Writables; /** * A janitor for the catalog tables. Scans the .META. catalog * table on a period looking for unused regions to garbage collect. */ -class CatalogJanitor extends Chore { +public class CatalogJanitor extends Chore { private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName()); private final Server server; private final MasterServices services; @@ -96,16 +97,38 @@ class CatalogJanitor extends Chore { } /** - * Scans META and returns a number of scanned rows, and - * an ordered map of split parents. + * Scans META and returns a number of scanned rows, and a map of merged + * regions, and an ordered map of split parents. + * @return triple of scanned rows, map of merged regions and map of split + * parent regioninfos + * @throws IOException */ - Pair> getSplitParents() throws IOException { - // TODO: Only works with single .META. region currently. Fix. + Triple, Map> getMergedRegionsAndSplitParents() + throws IOException { + return getMergedRegionsAndSplitParents(null); + } + + /** + * Scans META and returns a number of scanned rows, and a map of merged + * regions, and an ordered map of split parents. if the given table name is + * null, return merged regions and split parents of all tables, else only the + * specified table + * @param tableName null represents all tables + * @return triple of scanned rows, and map of merged regions, and map of split + * parent regioninfos + * @throws IOException + */ + Triple, Map> getMergedRegionsAndSplitParents( + final byte[] tableName) throws IOException { + final boolean isTableSpecified = (tableName != null && tableName.length != 0); + + // TODO: Only works with single .META. region currently. Fix. final AtomicInteger count = new AtomicInteger(0); // Keep Map of found split parents. There are candidates for cleanup. // Use a comparator that has split parents come before its daughters. final Map splitParents = new TreeMap(new SplitParentFirstComparator()); + final Map mergedRegions = new TreeMap(); // This visitor collects split parents and counts rows in the .META. table MetaReader.Visitor visitor = new MetaReader.Visitor() { @Override @@ -114,33 +137,104 @@ class CatalogJanitor extends Chore { count.incrementAndGet(); HRegionInfo info = getHRegionInfo(r); if (info == null) return true; // Keep scanning + if (isTableSpecified && Bytes.compareTo(info.getTableName(), tableName) > 0) { + //Another table, stop scanning + return false; + } + if (info.isSplitParent()) splitParents.put(info, r); + if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) { + mergedRegions.put(info, r); + } // Returning true means "keep scanning" return true; } }; - // Run full scan of .META. catalog table passing in our custom visitor - MetaReader.fullScan(this.server.getCatalogTracker(), visitor); - return new Pair>(count.get(), splitParents); + byte[] startRow = (!isTableSpecified) ? HConstants.EMPTY_START_ROW : HRegionInfo + .createRegionName(tableName, HConstants.EMPTY_START_ROW, HConstants.ZEROES, false); + // Run full scan of .META. catalog table passing in our custom visitor with + // the start row + MetaReader.fullScan(this.server.getCatalogTracker(), visitor, startRow); + + return new Triple, Map>(count.get(), + mergedRegions, splitParents); + } + + /** + * If merged region no longer holds reference to the merge regions, archive + * merge region on hdfs and perform deleting references in .META. + * @param mergedRegion + * @param regionA + * @param regionB + * @return true if we delete references in merged region on .META. and archive + * the files on the file system + * @throws IOException + */ + boolean cleanMergeRegion(final HRegionInfo mergedRegion, + final HRegionInfo regionA, final HRegionInfo regionB) throws IOException { + + Pair p = checkRegionInFs(mergedRegion); + if (!p.getFirst()) { + LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName()); + } + if (hasNoReferences(p)) { + LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and " + + regionB.getRegionNameAsString() + + " from fs because merged region no longer holds references"); + FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); + HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA); + HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB); + MetaEditor.deleteMergeQualifiers(server.getCatalogTracker(), mergedRegion); + return true; + } + + return false; } /** * Run janitorial scan of catalog .META. table looking for * garbage to collect. + * @return number of cleaned regions * @throws IOException */ int scan() throws IOException { - Pair> pair = getSplitParents(); - int count = pair.getFirst(); - Map splitParents = pair.getSecond(); + Triple, Map> scanTriple = + getMergedRegionsAndSplitParents(); + int count = scanTriple.getFirst(); + /** + * clean merge regions first + */ + int mergeCleaned = 0; + Map mergedRegions = scanTriple.getSecond(); + for (Map.Entry e : mergedRegions.entrySet()) { + HRegionInfo regionA = MetaReader.parseHRegionInfoFromCatalogResult(e.getValue(), + HConstants.MERGEA_QUALIFIER); + HRegionInfo regionB = MetaReader.parseHRegionInfoFromCatalogResult(e.getValue(), + HConstants.MERGEB_QUALIFIER); + if (regionA == null || regionB == null) { + LOG.warn("Unexpected references regionA=" + + (regionA == null ? "null" : regionA.getRegionNameAsString()) + + ",regionB=" + + (regionB == null ? "null" : regionB.getRegionNameAsString()) + + " in merged region " + e.getKey().getRegionNameAsString()); + } else { + if (cleanMergeRegion(e.getKey(), regionA, regionB)) { + mergeCleaned++; + } + } + } + /** + * clean split parents + */ + Map splitParents = scanTriple.getThird(); // Now work on our list of found parents. See if any we can clean up. - int cleaned = 0; + int splitCleaned = 0; HashSet parentNotCleaned = new HashSet(); //regions whose parents are still around for (Map.Entry e : splitParents.entrySet()) { if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && cleanParent(e.getKey(), e.getValue())) { - cleaned++; + splitCleaned++; } else { // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160) parentNotCleaned.add(getDaughterRegionInfo( @@ -149,14 +243,16 @@ class CatalogJanitor extends Chore { e.getValue(), HConstants.SPLITB_QUALIFIER).getEncodedName()); } } - if (cleaned != 0) { - LOG.info("Scanned " + count + " catalog row(s) and gc'd " + cleaned + - " unreferenced parent region(s)"); + if ((mergeCleaned + splitCleaned) != 0) { + LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned + + " unreferenced merged region(s) and " + splitCleaned + + " unreferenced parent region(s)"); } else if (LOG.isDebugEnabled()) { - LOG.debug("Scanned " + count + " catalog row(s) and gc'd " + cleaned + - " unreferenced parent region(s)"); + LOG.debug("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned + + " unreferenced merged region(s) and " + splitCleaned + + " unreferenced parent region(s)"); } - return cleaned; + return mergeCleaned + splitCleaned; } /** @@ -225,13 +321,21 @@ class CatalogJanitor extends Chore { boolean cleanParent(final HRegionInfo parent, Result rowContent) throws IOException { boolean result = false; + // Check whether it is a merged region and not clean reference + // No necessary to check MERGEB_QUALIFIER because these two qualifiers will + // be inserted/deleted together + if (rowContent.getValue(HConstants.CATALOG_FAMILY, + HConstants.MERGEA_QUALIFIER) != null) { + // wait cleaning merge region first + return result; + } // Run checks on each daughter split. HRegionInfo a_region = getDaughterRegionInfo(rowContent, HConstants.SPLITA_QUALIFIER); HRegionInfo b_region = getDaughterRegionInfo(rowContent, HConstants.SPLITB_QUALIFIER); Pair a = - checkDaughterInFs(parent, a_region, HConstants.SPLITA_QUALIFIER); + checkRegionInFs(a_region); Pair b = - checkDaughterInFs(parent, b_region, HConstants.SPLITB_QUALIFIER); + checkRegionInFs(b_region); if (hasNoReferences(a) && hasNoReferences(b)) { LOG.debug("Deleting region " + parent.getRegionNameAsString() + " because daughter splits no longer hold references"); @@ -291,17 +395,13 @@ class CatalogJanitor extends Chore { /** * Checks if a daughter region -- either splitA or splitB -- still holds * references to parent. - * @param parent Parent region name. - * @param split Which column family. - * @param qualifier Which of the daughters to look at, splitA or splitB. - * @return A pair where the first boolean says whether or not the daughter + * @param split the region name + * @return A pair where the first boolean says whether or not the region * region directory exists in the filesystem and then the second boolean says - * whether the daughter has references to the parent. + * whether the region has references. * @throws IOException */ - Pair checkDaughterInFs(final HRegionInfo parent, - final HRegionInfo split, - final byte [] qualifier) + Pair checkRegionInFs(final HRegionInfo split) throws IOException { boolean references = false; boolean exists = false; @@ -317,7 +417,7 @@ class CatalogJanitor extends Chore { LOG.warn("Daughter regiondir does not exist: " + regiondir.toString()); return new Pair(exists, Boolean.FALSE); } - HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName()); + HTableDescriptor parentDescriptor = getTableDescriptor(split.getTableName()); for (HColumnDescriptor family: parentDescriptor.getFamilies()) { Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(), @@ -345,4 +445,33 @@ class CatalogJanitor extends Chore { throws FileNotFoundException, IOException { return this.services.getTableDescriptors().get(Bytes.toString(tableName)); } + + /** + * Checks if the specified region has merge qualifiers, if so, try to clean + * them + * @param region + * @return true if the specified region doesn't have merge qualifier now + * @throws IOException + */ + public boolean cleanMergeQualifier(final HRegionInfo region) + throws IOException { + // Get merge regions if it is a merged region and already has merge + // qualifier + Pair mergeRegions = MetaReader + .getRegionsFromMergeQualifier(this.services.getCatalogTracker(), + region.getRegionName()); + if (mergeRegions == null + || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) { + // It doesn't have merge qualifier, no need to clean + return true; + } + // It shouldn't happen, we must insert/delete these two qualifiers together + if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) { + LOG.error("Merged region " + region.getRegionNameAsString() + + " has only one merge qualifier in META."); + return false; + } + return cleanMergeRegion(region, mergeRegions.getFirst(), + mergeRegions.getSecond()); + } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index ff316dc..51113a0 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -85,6 +85,7 @@ import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; +import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.ModifyTableHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; @@ -1227,6 +1228,38 @@ Server { } } + @Override + public void dispatchMergingRegions(final byte[] encodedNameOfRegionA, + final byte[] encodedNameOfRegionB, final boolean forcible) throws IOException { + //TODO: not sure if Regions are not assigned + Pair pA = + this.assignmentManager.getAssignment(encodedNameOfRegionA); + Pair pB = + this.assignmentManager.getAssignment(encodedNameOfRegionB); + if (pA == null || pB == null) { + throw new UnknownRegionException( + Bytes.toStringBinary(pA == null ? encodedNameOfRegionA + : encodedNameOfRegionB)); + } + + if (!forcible && !HRegionInfo.areAdjacent(pA.getFirst(), pB.getFirst())) { + throw new IOException("Unable to merge not adjacent regions " + + pA.getFirst().getRegionNameAsString() + ", " + + pB.getFirst().getRegionNameAsString() + + " where forcible = " + forcible); + } + + dispatchMergingRegions(pA.getFirst(), pB.getFirst(), forcible); + } + + @Override + public void dispatchMergingRegions(final HRegionInfo region_a, + final HRegionInfo region_b, final boolean forcible) throws IOException { + checkInitialized(); + this.executorService.submit(new DispatchMergingRegionHandler(this, + this.catalogJanitorChore, region_a, region_b, forcible)); + } + public void createTable(HTableDescriptor hTableDescriptor, byte [][] splitKeys) throws IOException { diff --git a/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 3eefdfc..3e9ac5e 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -22,9 +22,12 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.TableDescriptors; +import org.apache.hadoop.hbase.TableNotDisabledException; +import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; @@ -56,7 +59,7 @@ public interface MasterServices extends Server { * Check table is modifiable; i.e. exists and is offline. * @param tableName Name of table to check. * @throws TableNotDisabledException - * @throws TableNotFoundException + * @throws TableNotFoundException */ public void checkTableModifiable(final byte [] tableName) throws IOException; @@ -166,4 +169,16 @@ public interface MasterServices extends Server { */ public boolean registerProtocol( Class protocol, T handler); + + /** + * Merge two regions. The real implementation is on the regionserver, master + * just move the regions together and send MERGE RPC to regionserver + * @param region_a region to merge + * @param region_b region to merge + * @param forcible true if do a compulsory merge, otherwise we will only merge + * two adjacent regions + * @throws IOException + */ + public void dispatchMergingRegions(final HRegionInfo region_a, + final HRegionInfo region_b, final boolean forcible) throws IOException; } diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 0397020..f954fbc 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -211,8 +211,8 @@ public class ServerManager { } /** - * Checks if the clock skew between the server and the master. If the clock skew exceeds the - * configured max, it will throw an exception; if it exceeds the configured warning threshold, + * Checks if the clock skew between the server and the master. If the clock skew exceeds the + * configured max, it will throw an exception; if it exceeds the configured warning threshold, * it will log a warning but start normally. * @param serverName Incoming servers's name * @param serverCurrentTime @@ -229,7 +229,7 @@ public class ServerManager { throw new ClockOutOfSyncException(message); } else if (skew > warningSkew){ String message = "Reported time for server " + serverName + " is out of sync with master " + - "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " + + "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " + "error threshold is " + maxSkew + "ms)"; LOG.warn(message); } @@ -548,6 +548,37 @@ public class ServerManager { } /** + * Sends an MERGE REGIONS RPC to the specified server to merge the specified + * regions. + *

+ * A region server could reject the close request because it either does not + * have the specified region. + * @param server server to merge regions + * @param region_a region to merge + * @param region_b region to merge + * @param forcible true if do a compulsory merge, otherwise we will only merge + * two adjacent regions + * @throws IOException + */ + public void sendRegionsMerge(ServerName server, HRegionInfo region_a, + HRegionInfo region_b, boolean forcible) throws IOException { + if (server == null) + throw new NullPointerException("Passed server is null"); + if (region_a == null || region_b == null) + throw new NullPointerException("Passed region is null"); + + HRegionInterface hri = getServerConnection(server); + if (hri == null) { + throw new IOException("Attempting to send MERGE REGIONS RPC to server " + + server.toString() + " for region " + + region_a.getRegionNameAsString() + "," + + region_b.getRegionNameAsString() + + " failed because no RPC connection found to this server"); + } + hri.mergeRegions(region_a, region_b, forcible); + } + + /** * @param sn * @return * @throws IOException @@ -695,7 +726,7 @@ public class ServerManager { } } } - + /** * To clear any dead server with same host name and port of any online server */ diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/DispatchMergingRegionHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/DispatchMergingRegionHandler.java new file mode 100644 index 0000000..e547172 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/DispatchMergingRegionHandler.java @@ -0,0 +1,162 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.io.IOException; +import java.io.InterruptedIOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerLoad.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.executor.EventHandler; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.master.CatalogJanitor; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; + +/** + * Handles MERGE regions request on master: move the regions together(on the + * same regionserver) and send MERGE RPC to regionserver. + * + * NOTE:The real merge is executed on the regionserver + * + */ +@InterfaceAudience.Private +public class DispatchMergingRegionHandler extends EventHandler { + private static final Log LOG = LogFactory.getLog(DispatchMergingRegionHandler.class); + private final MasterServices masterServices; + private final CatalogJanitor catalogJanitor; + private HRegionInfo region_a; + private HRegionInfo region_b; + private final boolean forcible; + private final int timeout; + + public DispatchMergingRegionHandler(final MasterServices services, + final CatalogJanitor catalogJanitor, final HRegionInfo region_a, + final HRegionInfo region_b, final boolean forcible) { + super(services, EventType.C_M_MERGE_REGION); + this.masterServices = services; + this.catalogJanitor = catalogJanitor; + this.region_a = region_a; + this.region_b = region_b; + this.forcible = forcible; + this.timeout = server.getConfiguration().getInt( + "hbase.master.regionmerge.timeout", 30 * 1000); + } + + @Override + public void process() throws IOException { + boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(region_a); + if (regionAHasMergeQualifier + || !catalogJanitor.cleanMergeQualifier(region_b)) { + LOG.info("Skip merging regions " + region_a.getRegionNameAsString() + + ", " + region_b.getRegionNameAsString() + ", because region " + + (regionAHasMergeQualifier ? region_a.getEncodedName() : region_b + .getEncodedName()) + " has merge qualifier"); + return; + } + + AssignmentManager am = masterServices.getAssignmentManager(); + ServerName region_a_location = am.getRegionServerOfRegion(region_a); + ServerName region_b_location = am.getRegionServerOfRegion(region_b); + if (region_a_location == null || region_b_location == null) { + LOG.info("Skip merging regions " + region_a.getRegionNameAsString() + + ", " + region_b.getRegionNameAsString() + ", because region " + + (region_a_location == null ? region_a.getEncodedName() : region_b + .getEncodedName()) + " is not online now"); + return; + } + long startTime = EnvironmentEdgeManager.currentTimeMillis(); + boolean onSameRS = region_a_location.equals(region_b_location); + + // Make sure regions are on the same regionserver before send merge + // regions request to regionserver + if (!onSameRS) { + // Move region_b to region a's location, switch region_a and region_b if + // region_a's load lower than region_b's, so we will always move lower + // load region + RegionLoad loadOfRegionA = masterServices.getServerManager() + .getLoad(region_a_location).getRegionsLoad() + .get(region_a.getRegionName()); + RegionLoad loadOfRegionB = masterServices.getServerManager() + .getLoad(region_b_location).getRegionsLoad() + .get(region_b.getRegionName()); + if (loadOfRegionA != null && loadOfRegionB != null + && loadOfRegionA.getRequestsCount() < loadOfRegionB + .getRequestsCount()) { + // switch region_a and region_b + HRegionInfo tmpRegion = this.region_a; + this.region_a = this.region_b; + this.region_b = tmpRegion; + ServerName tmpLocation = region_a_location; + region_a_location = region_b_location; + region_b_location = tmpLocation; + } + + RegionPlan regionPlan = new RegionPlan(region_b, region_b_location, + region_a_location); + masterServices.getAssignmentManager().balance(regionPlan); + while (!masterServices.isStopped()) { + try { + Thread.sleep(20); + region_b_location = masterServices.getAssignmentManager() + .getRegionServerOfRegion(region_b); + onSameRS = region_a_location.equals(region_b_location); + if (onSameRS || am.isRegionInTransition(region_b) == null) { + // Regions are on the same RS, or region_b is not in + // RegionInTransition any more + break; + } + if ((EnvironmentEdgeManager.currentTimeMillis() - startTime) > timeout) break; + } catch (InterruptedException e) { + InterruptedIOException iioe = new InterruptedIOException(); + iioe.initCause(e); + throw iioe; + } + } + } + + if (onSameRS) { + try{ + masterServices.getServerManager().sendRegionsMerge(region_a_location, + region_a, region_b, forcible); + LOG.info("Successfully send MERGE REGIONS RPC to server " + + region_a_location.toString() + " for region " + + region_a.getRegionNameAsString() + "," + + region_b.getRegionNameAsString() + ", focible=" + forcible); + } catch (IOException ie) { + LOG.info("Failed send MERGE REGIONS RPC to server " + + region_a_location.toString() + " for region " + + region_a.getRegionNameAsString() + "," + + region_b.getRegionNameAsString() + ", focible=" + forcible + ", " + + ie.getMessage()); + } + } else { + LOG.info("Cancel merging regions " + region_a.getRegionNameAsString() + + ", " + region_b.getRegionNameAsString() + + ", because can't move them together after " + + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms"); + } + } + +} diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java new file mode 100644 index 0000000..e8898ad --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/MergedRegionHandler.java @@ -0,0 +1,116 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.executor.EventHandler; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; + +/** + * Handles MERGE regions event on Master, master receive the merge report from + * the regionserver, then offline the merging regions and online the merged + * region.Here region_a sorts before region_b. + */ +@InterfaceAudience.Private +public class MergedRegionHandler extends EventHandler implements + TotesHRegionInfo { + private static final Log LOG = LogFactory.getLog(MergedRegionHandler.class); + private final AssignmentManager assignmentManager; + private final HRegionInfo merged; + private final HRegionInfo region_a; + private final HRegionInfo region_b; + private final ServerName sn; + + public MergedRegionHandler(Server server, + AssignmentManager assignmentManager, ServerName sn, + final List mergeRegions) { + super(server, EventType.RS_ZK_REGION_MERGE); + assert mergeRegions.size() == 3; + this.assignmentManager = assignmentManager; + this.merged = mergeRegions.get(0); + this.region_a = mergeRegions.get(1); + this.region_b = mergeRegions.get(2); + this.sn = sn; + } + + @Override + public HRegionInfo getHRegionInfo() { + return this.merged; + } + + @Override + public String toString() { + String name = "UnknownServerName"; + if (server != null && server.getServerName() != null) { + name = server.getServerName().toString(); + } + String mergedRegion = "UnknownRegion"; + if (merged != null) { + mergedRegion = merged.getRegionNameAsString(); + } + return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + + mergedRegion; + } + + @Override + public void process() { + String encodedRegionName = this.merged.getEncodedName(); + LOG.debug("Handling MERGE event for " + encodedRegionName + + "; deleting node"); + + this.assignmentManager.handleRegionsMergeReport(this.sn, this.merged, + this.region_a, this.region_b); + // Remove region from ZK + try { + + boolean successful = false; + while (!successful) { + // It's possible that the RS tickles in between the reading of the + // znode and the deleting, so it's safe to retry. + successful = ZKAssign.deleteNode(this.server.getZooKeeper(), + encodedRegionName, EventType.RS_ZK_REGION_MERGE); + } + } catch (KeeperException e) { + if (e instanceof NoNodeException) { + String znodePath = ZKUtil.joinZNode( + this.server.getZooKeeper().splitLogZNode, encodedRegionName); + LOG.debug("The znode " + znodePath + + " does not exist. May be deleted already."); + } else { + server.abort("Error deleting MERGE node in ZK for transition ZK node (" + + merged.getEncodedName() + ")", e); + } + } + LOG.info("Handled MERGE event; merged=" + + this.merged.getRegionNameAsString() + " region_a=" + + this.region_a.getRegionNameAsString() + "region_b=" + + this.region_b.getRegionNameAsString()); + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java index e1ebe6d..2dbfcb3 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java @@ -52,6 +52,7 @@ public class CompactSplitThread implements CompactionRequestor { private final ThreadPoolExecutor largeCompactions; private final ThreadPoolExecutor smallCompactions; private final ThreadPoolExecutor splits; + private final ThreadPoolExecutor mergePool; /** * Splitting should not take place if the total number of regions exceed this. @@ -114,6 +115,16 @@ public class CompactSplitThread implements CompactionRequestor { return t; } }); + int mergeThreads = conf.getInt("hbase.regionserver.thread.merge", 1); + this.mergePool = (ThreadPoolExecutor) Executors.newFixedThreadPool( + mergeThreads, new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(r); + t.setName(n + "-merges-" + System.currentTimeMillis()); + return t; + } + }); } @Override @@ -121,7 +132,8 @@ public class CompactSplitThread implements CompactionRequestor { return "compaction_queue=(" + largeCompactions.getQueue().size() + ":" + smallCompactions.getQueue().size() + ")" - + ", split_queue=" + splits.getQueue().size(); + + ", split_queue=" + splits.getQueue().size() + + ", merge_queue=" + mergePool.getQueue().size(); } public String dumpQueue() { @@ -155,9 +167,32 @@ public class CompactSplitThread implements CompactionRequestor { queueLists.append("\n"); } + queueLists.append("\n"); + queueLists.append(" Region Merge Queue:\n"); + lq = mergePool.getQueue(); + it = lq.iterator(); + while (it.hasNext()) { + queueLists.append(" " + it.next().toString()); + queueLists.append("\n"); + } + return queueLists.toString(); } + public synchronized void requestRegionsMerge(final HRegion a, + final HRegion b, final boolean forcible) { + try { + mergePool.execute(new RegionMergeRequest(a, b, this.server, forcible)); + if (LOG.isDebugEnabled()) { + LOG.debug("Region merge requested for " + a + "," + b + ", forcible=" + + forcible + ". " + this); + } + } catch (RejectedExecutionException ree) { + LOG.warn("Could not execute merge for " + a + "," + b + ", forcible=" + + forcible, ree); + } + } + public synchronized boolean requestSplit(final HRegion r) { // don't split regions that are blocking if (shouldSplitRegion() && r.getCompactPriority() >= Store.PRIORITY_USER) { @@ -259,6 +294,7 @@ public class CompactSplitThread implements CompactionRequestor { */ void interruptIfNecessary() { splits.shutdown(); + mergePool.shutdown(); largeCompactions.shutdown(); smallCompactions.shutdown(); } @@ -280,6 +316,7 @@ public class CompactSplitThread implements CompactionRequestor { void join() { waitFor(splits, "Split Thread"); + waitFor(mergePool, "Merge Thread"); waitFor(largeCompactions, "Large Compaction Thread"); waitFor(smallCompactions, "Small Compaction Thread"); } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 6eb5f5e..3c34ec8 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -62,8 +62,8 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -83,7 +83,6 @@ import org.apache.hadoop.hbase.RegionTooBusyException; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Increment; @@ -93,6 +92,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.RowLock; +import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; @@ -103,6 +103,7 @@ import org.apache.hadoop.hbase.filter.FilterBase; import org.apache.hadoop.hbase.filter.IncompatibleFilterException; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.io.TimeRange; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; @@ -465,7 +466,7 @@ public class HRegion implements HeapSize { // , Writable{ // When hbase.regionserver.optionallogflushinterval <= 0 , deferred log sync is disabled. this.deferredLogSyncDisabled = conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000) <= 0; - + if (rsServices != null) { this.rsAccounting = this.rsServices.getRegionServerAccounting(); // don't initialize coprocessors if not running within a regionserver @@ -618,7 +619,7 @@ public class HRegion implements HeapSize { // , Writable{ // these directories here on open. We may be opening a region that was // being split but we crashed in the middle of it all. SplitTransaction.cleanupAnySplitDetritus(this); - FSUtils.deleteDirectory(this.fs, new Path(regiondir, MERGEDIR)); + cleanupMergesDir(); this.writestate.setReadOnly(this.htableDescriptor.isReadOnly()); @@ -668,9 +669,8 @@ public class HRegion implements HeapSize { // , Writable{ */ public boolean hasReferences() { for (Store store : this.stores.values()) { - for (StoreFile sf : store.getStorefiles()) { - // Found a reference, return. - if (sf.isReference()) return true; + if (store.hasReferences()) { + return true; } } return false; @@ -888,6 +888,24 @@ public class HRegion implements HeapSize { // , Writable{ return isAvailable() && !hasReferences(); } + /** + * @return true if region is mergeable + */ + public boolean isMergeable() { + if (!isAvailable()) { + LOG.debug("Region " + this.getRegionNameAsString() + + " is not mergeable because it is closing or closed"); + return false; + } + if (hasReferences()) { + LOG.debug("Region " + this.getRegionNameAsString() + + " is not mergeable because it has references"); + return false; + } + + return true; + } + boolean areWritesEnabled() { synchronized(this.writestate) { return this.writestate.writesEnabled; @@ -1688,7 +1706,7 @@ public class HRegion implements HeapSize { // , Writable{ checkRow(row, "getClosestRowBefore"); startRegionOperation(); this.readRequestsCount.increment(); - this.opMetrics.setReadRequestCountMetrics(this.readRequestsCount.get()); + this.opMetrics.setReadRequestCountMetrics(this.readRequestsCount.get()); try { Store store = getStore(family); // get the closest key. (HStore.getRowKeyAtOrBefore can return null) @@ -2305,8 +2323,8 @@ public class HRegion implements HeapSize { // , Writable{ // calling the pre CP hook for batch mutation if (coprocessorHost != null) { - MiniBatchOperationInProgress> miniBatchOp = - new MiniBatchOperationInProgress>(batchOp.operations, + MiniBatchOperationInProgress> miniBatchOp = + new MiniBatchOperationInProgress>(batchOp.operations, batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive); if (coprocessorHost.preBatchMutate(miniBatchOp)) return 0L; } @@ -2386,12 +2404,12 @@ public class HRegion implements HeapSize { // , Writable{ walSyncSuccessful = true; // calling the post CP hook for batch mutation if (coprocessorHost != null) { - MiniBatchOperationInProgress> miniBatchOp = - new MiniBatchOperationInProgress>(batchOp.operations, + MiniBatchOperationInProgress> miniBatchOp = + new MiniBatchOperationInProgress>(batchOp.operations, batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive); coprocessorHost.postBatchMutate(miniBatchOp); } - + // ------------------------------------------------------------------ // STEP 8. Advance mvcc. This will make this put visible to scanners and getters. // ------------------------------------------------------------------ @@ -2492,7 +2510,7 @@ public class HRegion implements HeapSize { // , Writable{ throws IOException { return checkAndMutate(row, family, qualifier, compareOp, comparator, w, null, writeToWAL); } - + /** * * @param row @@ -3657,7 +3675,7 @@ public class HRegion implements HeapSize { // , Writable{ public HRegionInfo getRegionInfo() { return regionInfo; } - + RegionScannerImpl(Scan scan, List additionalScanners, HRegion region) throws IOException { // DebugPrint.println("HRegionScanner."); @@ -3899,7 +3917,7 @@ public class HRegion implements HeapSize { // , Writable{ final boolean isEmptyRow = results.isEmpty(); // We have the part of the row necessary for filtering (all of it, usually). - // First filter with the filterRow(List). + // First filter with the filterRow(List). if (filter != null && filter.hasFilterRow()) { filter.filterRow(results); } @@ -3973,7 +3991,7 @@ public class HRegion implements HeapSize { // , Writable{ protected boolean nextRow(byte [] currentRow, int offset, short length) throws IOException { KeyValue next; while((next = this.storeHeap.peek()) != null && next.matchingRow(currentRow, offset, length)) { - this.storeHeap.next(MOCKED_LIST); + this.storeHeap.next(MOCKED_LIST); } results.clear(); resetFilters(); @@ -4318,6 +4336,119 @@ public class HRegion implements HeapSize { // , Writable{ } } + Path getMergesDir() { + return new Path(getRegionDir(), MERGEDIR); + } + + Path getMergesDir(final HRegionInfo hri) { + return new Path(getMergesDir(), hri.getEncodedName()); + } + + /** + * Clean up any merge detritus that may have been left around from previous merge attempts. + */ + void cleanupMergesDir() throws IOException { + FSUtils.deleteDirectory(fs, getMergesDir()); + } + + /** + * Remove merged region + * @param mergedRegion {@link HRegionInfo} + * @throws IOException + */ + void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException { + Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName()); + if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) { + throw new IOException("Failed delete of " + regionDir); + } + } + + /** + * Create the region merges directory. + * @throws IOException If merges dir already exists or we fail to create it. + * @see HRegionFileSystem#cleanupMergesDir() + */ + void createMergesDir() throws IOException { + Path mergesdir = getMergesDir(); + if (fs.exists(mergesdir)) { + LOG.info("The " + mergesdir + + " directory exists. Hence deleting it to recreate it"); + if (!fs.delete(mergesdir, true)) { + throw new IOException("Failed deletion of " + mergesdir + + " before creating them again."); + } + } + if (!fs.mkdirs(mergesdir)) + throw new IOException("Failed create of " + mergesdir); + } + + /** + * Write out a merge reference under the given merges directory. Package local + * so it doesnt leak out of regionserver. + * @param mergedRegion {@link HRegionInfo} of the merged region + * @param familyName Column Family Name + * @param f File to create reference. + * @param mergedDir + * @return Path to created reference. + * @throws IOException + */ + Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName, + final StoreFile f, final Path mergedDir) + throws IOException { + Path referenceDir = new Path(new Path(mergedDir, + mergedRegion.getEncodedName()), familyName); + // A whole reference to the store file. + Reference r = Reference.createTopReference(regionInfo.getStartKey()); + // Add the referred-to regions name as a dot separated suffix. + // See REF_NAME_REGEX regex above. The referred-to regions name is + // up in the path of the passed in f -- parentdir is family, + // then the directory above is the region name. + String mergingRegionName = regionInfo.getEncodedName(); + // Write reference with same file id only with the other region name as + // suffix and into the new region location (under same family). + Path p = new Path(referenceDir, f.getPath().getName() + "." + + mergingRegionName); + return r.write(fs, p); + } + + /** + * Commit a merged region, moving it from the merges temporary directory to + * the proper location in the filesystem. + * @param mergedRegionInfo merged region {@link HRegionInfo} + * @throws IOException + */ + void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException { + Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName()); + Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo); + // Move the tmp dir in the expected location + if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) { + if (!fs.rename(mergedRegionTmpDir, regionDir)) { + throw new IOException("Unable to rename " + mergedRegionTmpDir + " to " + + regionDir); + } + } + } + + /** + * Create a merged region given a temp directory with the region data. + * @param mergedRegionInfo + * @param region_b another merging region + * @return merged hregion + * @throws IOException + */ + HRegion createMergedRegionFromMerges(final HRegionInfo mergedRegionInfo, + final HRegion region_b) throws IOException { + HRegion r = HRegion.newHRegion(this.getTableDir(), this.getLog(), + getFilesystem(), this.getConf(), mergedRegionInfo, + this.getTableDesc(), this.rsServices); + r.readRequestsCount.set(this.getReadRequestsCount() + + region_b.getReadRequestsCount()); + r.writeRequestsCount.set(this.getWriteRequestsCount() + + region_b.getWriteRequestsCount()); + this.commitMergedRegion(mergedRegionInfo); + return r; + } + /** * Inserts a new region's meta information into the passed * meta region. Used by the HMaster bootstrap code adding @@ -4567,11 +4698,11 @@ public class HRegion implements HeapSize { // , Writable{ long totalReadRequestCount = a.readRequestsCount.get() + b.readRequestsCount.get(); dstRegion.readRequestsCount.set(totalReadRequestCount); dstRegion.opMetrics.setReadRequestCountMetrics(totalReadRequestCount); - + long totalWriteRequestCount = a.writeRequestsCount.get() + b.writeRequestsCount.get(); dstRegion.writeRequestsCount.set(totalWriteRequestCount); dstRegion.opMetrics.setWriteRequestCountMetrics(totalWriteRequestCount); - + dstRegion.initialize(); dstRegion.compactStores(); if (LOG.isDebugEnabled()) { @@ -5645,7 +5776,7 @@ public class HRegion implements HeapSize { // , Writable{ { this.opMetrics.setReadRequestCountMetrics(value); } - + /* * Set the write request count defined in opMetrics * @param value absolute value of write request count @@ -5654,7 +5785,7 @@ public class HRegion implements HeapSize { // , Writable{ { this.opMetrics.setWriteRequestCountMetrics(value); } - + /** @param coprocessorHost the new coprocessor host */ public void setCoprocessorHost(final RegionCoprocessorHost coprocessorHost) { this.coprocessorHost = coprocessorHost; diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index e9b9ef3..9c9d04b 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -3201,6 +3201,29 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, compactSplitThread.requestSplit(region, region.checkSplit()); } + /** + * Merge two regions. Asynchronous operation. + * @param encodedNameOfRegionA encoded name of region a + * @param encodedNameOfRegionB encoded name of region b + * @param forcible true if do a compulsory merge, otherwise we will only merge + * two adjacent regions + * @throws IOException + */ + @Override + @QosPriority(priority = HConstants.HIGH_QOS) + public void mergeRegions(final HRegionInfo regionInfoA, + final HRegionInfo regionInfoB, final boolean forcible) throws IOException { + checkOpen(); + requestCount.incrementAndGet(); + HRegion regionA = getRegion(regionInfoA.getRegionName()); + HRegion regionB = getRegion(regionInfoB.getRegionName()); + LOG.info("Receiving merging request for " + regionA + ", " + regionB + + ",forcible=" + forcible); + regionA.flushcache(); + regionB.flushcache(); + compactSplitThread.requestRegionsMerge(regionA, regionB, forcible); + } + @Override @QosPriority(priority=HConstants.HIGH_QOS) public void compactRegion(HRegionInfo regionInfo, boolean major) diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeRequest.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeRequest.java new file mode 100644 index 0000000..35b6942 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeRequest.java @@ -0,0 +1,112 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.util.StringUtils; + +import com.google.common.base.Preconditions; + +/** + * Handles processing region merges. Put in a queue, owned by HRegionServer. + */ +@InterfaceAudience.Private +class RegionMergeRequest implements Runnable { + static final Log LOG = LogFactory.getLog(RegionMergeRequest.class); + private final HRegion region_a; + private final HRegion region_b; + private final HRegionServer server; + private final boolean forcible; + + RegionMergeRequest(HRegion a, HRegion b, HRegionServer hrs, boolean forcible) { + Preconditions.checkNotNull(hrs); + this.region_a = a; + this.region_b = b; + this.server = hrs; + this.forcible = forcible; + } + + @Override + public String toString() { + return "MergeRequest,regions:" + region_a + ", " + region_b + ", forcible=" + + forcible; + } + + @Override + public void run() { + if (this.server.isStopping() || this.server.isStopped()) { + LOG.debug("Skipping merge because server is stopping=" + + this.server.isStopping() + " or stopped=" + this.server.isStopped()); + return; + } + try { + final long startTime = EnvironmentEdgeManager.currentTimeMillis(); + RegionMergeTransaction mt = new RegionMergeTransaction(region_a, + region_b, forcible); + // If prepare does not return true, for some reason -- logged inside in + // the prepare call -- we are not ready to merge just now. Just return. + if (!mt.prepare(this.server)) return; + try { + mt.execute(this.server, this.server); + } catch (Exception e) { + if (this.server.isStopping() || this.server.isStopped()) { + LOG.info( + "Skip rollback/cleanup of failed merge of " + region_a + " and " + + region_b + " because server is" + + (this.server.isStopping() ? " stopping" : " stopped"), e); + return; + } + try { + LOG.warn("Running rollback/cleanup of failed merge of " + + region_a +" and "+ region_b + "; " + e.getMessage(), e); + if (mt.rollback(this.server, this.server)) { + LOG.info("Successful rollback of failed merge of " + + region_a +" and "+ region_b); + } else { + this.server.abort("Abort; we got an error after point-of-no-return" + + "when merging " + region_a + " and " + region_b); + } + } catch (RuntimeException ee) { + String msg = "Failed rollback of failed merge of " + + region_a +" and "+ region_b + " -- aborting server"; + // If failed rollback, kill this server to avoid having a hole in + // table. + LOG.info(msg, ee); + this.server.abort(msg); + } + return; + } + LOG.info("Regions merged, META updated, and report to master. region_a=" + + region_a + ", region_b=" + region_b + ",merged region=" + + mt.getMergedRegionInfo().getRegionNameAsString() + + ". Region merge took " + + StringUtils.formatTimeDiff(EnvironmentEdgeManager.currentTimeMillis(), startTime)); + } catch (IOException ex) { + LOG.error("Merge failed " + this, + RemoteExceptionHandler.checkIOException(ex)); + server.checkFileSystem(); + } + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java new file mode 100644 index 0000000..79b1965 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionMergeTransaction.java @@ -0,0 +1,791 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.executor.RegionTransitionData; +import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.regionserver.SplitTransaction.LoggingProgressable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NodeExistsException; + +/** + * Executes region merge as a "transaction". It is similar with + * SplitTransaction. Call {@link #prepare(RegionServerServices)} to setup the + * transaction, {@link #execute(Server, RegionServerServices)} to run the + * transaction and {@link #rollback(Server, RegionServerServices)} to cleanup if + * execute fails. + * + *

+ * Here is an example of how you would use this class: + * + *

+ *  RegionMergeTransaction mt = new RegionMergeTransaction(this.conf, parent, midKey)
+ *  if (!mt.prepare(services)) return;
+ *  try {
+ *    mt.execute(server, services);
+ *  } catch (IOException ioe) {
+ *    try {
+ *      mt.rollback(server, services);
+ *      return;
+ *    } catch (RuntimeException e) {
+ *      myAbortable.abort("Failed merge, abort");
+ *    }
+ *  }
+ * 
+ *

+ * This class is not thread safe. Caller needs ensure merge is run by one thread + * only. + */ +@InterfaceAudience.Private +public class RegionMergeTransaction { + private static final Log LOG = LogFactory.getLog(RegionMergeTransaction.class); + + // Merged region info + private HRegionInfo mergedRegionInfo; + // region_a sorts before region_b + private final HRegion region_a; + private final HRegion region_b; + // merges dir is under region_a + private final Path mergesdir; + private int znodeVersion = -1; + // We only merge adjacent regions if forcible is false + private final boolean forcible; + + /** + * Types to add to the transaction journal. Each enum is a step in the merge + * transaction. Used to figure how much we need to rollback. + */ + enum JournalEntry { + /** + * Set region as in transition, set it into MERGING state. + */ + SET_MERGING_IN_ZK, + /** + * We created the temporary merge data directory. + */ + CREATED_MERGE_DIR, + /** + * Closed the merging region A. + */ + CLOSED_REGION_A, + /** + * The merging region A has been taken out of the server's online regions list. + */ + OFFLINED_REGION_A, + /** + * Closed the merging region B. + */ + CLOSED_REGION_B, + /** + * The merging region B has been taken out of the server's online regions list. + */ + OFFLINED_REGION_B, + /** + * Started in on creation of the merged region. + */ + STARTED_MERGED_REGION_CREATION, + /** + * Point of no return. If we got here, then transaction is not recoverable + * other than by crashing out the regionserver. + */ + PONR + } + + /* + * Journal of how far the merge transaction has progressed. + */ + private final List journal = new ArrayList(); + + private static IOException closedByOtherException = new IOException( + "Failed to close region: already closed by another thread"); + + /** + * Constructor + * @param a region a to merge + * @param b region b to merge + * @param forcible if false, we will only merge adjacent regions + */ + public RegionMergeTransaction(final HRegion a, final HRegion b, + final boolean forcible) { + if (a.getRegionInfo().compareTo(b.getRegionInfo()) <= 0) { + this.region_a = a; + this.region_b = b; + } else { + this.region_a = b; + this.region_b = a; + } + this.forcible = forcible; + this.mergesdir = region_a.getMergesDir(); + } + + /** + * Does checks on merge inputs. + * @param services + * @return true if the regions are mergeable else + * false if they are not (e.g. its already closed, etc.). + */ + public boolean prepare(final RegionServerServices services) { + if (!region_a.getTableDesc().getNameAsString() + .equals(region_b.getTableDesc().getNameAsString())) { + LOG.info("Can't merge regions " + region_a + "," + region_b + + " because they do not belong to the same table"); + return false; + } + if (region_a.getRegionInfo().equals(region_b.getRegionInfo())) { + LOG.info("Can't merge the same region " + region_a); + return false; + } + if (!forcible && !HRegionInfo.areAdjacent(region_a.getRegionInfo(), + region_b.getRegionInfo())) { + String msg = "Skip merging " + this.region_a.getRegionNameAsString() + + " and " + this.region_b.getRegionNameAsString() + + ", because they are not adjacent."; + LOG.info(msg); + return false; + } + if (!this.region_a.isMergeable() || !this.region_b.isMergeable()) { + return false; + } + try { + boolean regionAHasMergeQualifier = hasMergeQualifierInMeta(services, + region_a.getRegionName()); + if (regionAHasMergeQualifier || + hasMergeQualifierInMeta(services, region_b.getRegionName())) { + LOG.debug("Region " + (regionAHasMergeQualifier ? region_a.getRegionNameAsString() + : region_b.getRegionNameAsString()) + + " is not mergeable because it has merge qualifier in META"); + return false; + } + } catch (IOException e) { + LOG.warn("Failed judging whether merge transaction is available for " + + region_a.getRegionNameAsString() + " and " + + region_b.getRegionNameAsString(), e); + return false; + } + + // WARN: make sure there is no parent region of the two merging regions in + // .META. If exists, fixing up daughters would cause daughter regions(we + // have merged one) online again when we restart master, so we should clear + // the parent region to prevent the above case + // Since HBASE-7721, we don't need fix up daughters any more. so here do + // nothing + + this.mergedRegionInfo = getMergedRegionInfo(region_a.getRegionInfo(), + region_b.getRegionInfo()); + return true; + } + + /** + * Run the transaction. + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @throws IOException If thrown, transaction failed. Call + * {@link #rollback(Server, RegionServerServices)} + * @return merged region + * @throws IOException + * @see #rollback(Server, RegionServerServices) + */ + public HRegion execute(final Server server, + final RegionServerServices services) throws IOException { + HRegion mergedRegion = createMergedRegion(server, services); + openMergedRegion(server, services, mergedRegion); + transitionZKNode(server, services); + return mergedRegion; + } + + /** + * Prepare the merged region and region files. + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @return merged region + * @throws IOException If thrown, transaction failed. Call + * {@link #rollback(Server, RegionServerServices)} + */ + HRegion createMergedRegion(final Server server, + final RegionServerServices services) throws IOException { + LOG.info("Starting merge of " + region_a + " and " + + region_b.getRegionNameAsString() + ", forcible=" + forcible); + if ((server != null && server.isStopped()) + || (services != null && services.isStopping())) { + throw new IOException("Server is stopped or stopping"); + } + + // If true, no cluster to write meta edits to or to update znodes in. + boolean testing = server == null ? true : server.getConfiguration() + .getBoolean("hbase.testing.nocluster", false); + + // Set ephemeral MERGING znode up in zk. Mocked servers sometimes don't + // have zookeeper so don't do zk stuff if server or zookeeper is null + if (server != null && server.getZooKeeper() != null) { + try { + createNodeMerging(server.getZooKeeper(), this.mergedRegionInfo, + server.getServerName()); + } catch (KeeperException e) { + throw new IOException("Failed creating MERGING znode on " + + this.mergedRegionInfo.getRegionNameAsString(), e); + } + } + this.journal.add(JournalEntry.SET_MERGING_IN_ZK); + if (server != null && server.getZooKeeper() != null) { + try { + // Transition node from MERGING to MERGING after creating the merge + // node. Master will get the callback for node change only if the + // transition is successful. + // Note that if the transition fails then the rollback will delete the + // created znode as the journal entry SET_MERGING_IN_ZK is added. + this.znodeVersion = transitionNodeMerging(server.getZooKeeper(), + this.mergedRegionInfo, server.getServerName(), -1); + } catch (KeeperException e) { + throw new IOException("Failed setting MERGING znode on " + + this.mergedRegionInfo.getRegionNameAsString(), e); + } + } + + this.region_a.createMergesDir(); + this.journal.add(JournalEntry.CREATED_MERGE_DIR); + + List hstoreFilesOfRegionA = closeAndOfflineRegion( + services, this.region_a, true, testing); + List hstoreFilesOfRegionB = closeAndOfflineRegion( + services, this.region_b, false, testing); + + assert hstoreFilesOfRegionA != null && hstoreFilesOfRegionB != null; + + + // + // mergeStoreFiles creates merged region dirs under the region_a merges dir + // Nothing to unroll here if failure -- clean up of CREATE_MERGE_DIR will + // clean this up. + mergeStoreFiles(this.region_a.getFilesystem(), hstoreFilesOfRegionA, hstoreFilesOfRegionB); + + // Log to the journal that we are creating merged region. We could fail + // halfway through. If we do, we could have left + // stuff in fs that needs cleanup -- a storefile or two. Thats why we + // add entry to journal BEFORE rather than AFTER the change. + this.journal.add(JournalEntry.STARTED_MERGED_REGION_CREATION); + HRegion mergedRegion = createMergedRegionFromMerges(this.region_a, + this.region_b, this.mergedRegionInfo); + + + // This is the point of no return. Similar with SplitTransaction. + // IF we reach the PONR then subsequent failures need to crash out this + // regionserver + this.journal.add(JournalEntry.PONR); + + // Add merged region and delete region_a and region_b + // as an atomic update. See HBASE-7721. This update to META makes the region + // will determine whether the region is merged or not in case of failures. + // If it is successful, master will roll-forward, if not, master will + // rollback + if (!testing) { + MetaEditor.mergeRegions(server.getCatalogTracker(), + mergedRegion.getRegionInfo(), region_a.getRegionInfo(), + region_b.getRegionInfo(), server.getServerName()); + } + return mergedRegion; + } + + /** + * Create a merged region from the merges directory under region a. In order + * to mock it for tests, place it with a new method. + * @param a hri of region a + * @param b hri of region b + * @param mergedRegion hri of merged region + * @return merged HRegion. + * @throws IOException + */ + HRegion createMergedRegionFromMerges(final HRegion a, final HRegion b, + final HRegionInfo mergedRegion) throws IOException { + return a.createMergedRegionFromMerges(mergedRegion, b); + } + + /** + * Close the merging region and offline it in regionserver + * @param services + * @param region + * @param isRegionA true if it is merging region a, false if it is region b + * @param testing true if it is testing + * @return a map of family name to list of store files + * @throws IOException + */ + private List closeAndOfflineRegion( + final RegionServerServices services, final HRegion region, + final boolean isRegionA, final boolean testing) throws IOException { + List hstoreFilesToMerge = null; + Exception exceptionToThrow = null; + try { + hstoreFilesToMerge = region.close(false); + } catch (Exception e) { + exceptionToThrow = e; + } + if (exceptionToThrow == null && hstoreFilesToMerge == null) { + // The region was closed by a concurrent thread. We can't continue + // with the merge, instead we must just abandon the merge. If we + // reopen or merge this could cause problems because the region has + // probably already been moved to a different server, or is in the + // process of moving to a different server. + exceptionToThrow = closedByOtherException; + } + if (exceptionToThrow != closedByOtherException) { + this.journal.add(isRegionA ? JournalEntry.CLOSED_REGION_A + : JournalEntry.CLOSED_REGION_B); + } + if (exceptionToThrow != null) { + if (exceptionToThrow instanceof IOException) + throw (IOException) exceptionToThrow; + throw new IOException(exceptionToThrow); + } + + if (!testing) { + services.removeFromOnlineRegions(region.getRegionNameAsString()); + } + this.journal.add(isRegionA ? JournalEntry.OFFLINED_REGION_A + : JournalEntry.OFFLINED_REGION_B); + return hstoreFilesToMerge; + } + + /** + * Get merged region info through the specified two regions + * @param a merging region A + * @param b merging region B + * @return the merged region info + */ + public static HRegionInfo getMergedRegionInfo(final HRegionInfo a, + final HRegionInfo b) { + long rid = EnvironmentEdgeManager.currentTimeMillis(); + // Regionid is timestamp. Merged region's id can't be less than that of + // merging regions else will insert at wrong location in .META. + if (rid < a.getRegionId() || rid < b.getRegionId()) { + LOG.warn("Clock skew; merging regions id are " + a.getRegionId() + + " and " + b.getRegionId() + ", but current time here is " + rid); + rid = Math.max(a.getRegionId(), b.getRegionId()) + 1; + } + + byte[] startKey = null; + byte[] endKey = null; + if (a.compareTo(b) <= 0) { + startKey = a.getStartKey(); + endKey = b.getEndKey(); + } else { + startKey = b.getStartKey(); + endKey = a.getEndKey(); + } + + // Merged region is sorted between two merging regions in META + HRegionInfo mergedRegionInfo = new HRegionInfo(a.getTableName(), startKey, + endKey, false, rid); + return mergedRegionInfo; + } + + /** + * Perform time consuming opening of the merged region. + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @param merged the merged region + * @throws IOException If thrown, transaction failed. Call + * {@link #rollback(Server, RegionServerServices)} + */ + void openMergedRegion(final Server server, + final RegionServerServices services, HRegion merged) throws IOException { + boolean stopped = server != null && server.isStopped(); + boolean stopping = services != null && services.isStopping(); + if (stopped || stopping) { + LOG.info("Not opening merged region " + merged.getRegionNameAsString() + + " because stopping=" + stopping + ", stopped=" + stopped); + return; + } + HRegionInfo hri = merged.getRegionInfo(); + LoggingProgressable reporter = server == null ? null + : new LoggingProgressable(hri, server.getConfiguration().getLong( + "hbase.regionserver.regionmerge.open.log.interval", 10000)); + merged.openHRegion(reporter); + + if (services != null) { + try { + services.postOpenDeployTasks(merged, server.getCatalogTracker()); + services.addToOnlineRegions(merged); + } catch (KeeperException ke) { + throw new IOException(ke); + } + } + + } + + /** + * Finish off merge transaction, transition the zknode + * @param server Hosting server instance. Can be null when testing (won't try + * and update in zk if a null server) + * @param services Used to online/offline regions. + * @throws IOException If thrown, transaction failed. Call + * {@link #rollback(Server, RegionServerServices)} + */ + void transitionZKNode(final Server server, final RegionServerServices services) + throws IOException { + if (server == null || server.getZooKeeper() == null) { + return; + } + + // Tell master about merge by updating zk. If we fail, abort. + try { + this.znodeVersion = transitionNodeMerge(server.getZooKeeper(), + this.mergedRegionInfo, region_a.getRegionInfo(), + region_b.getRegionInfo(), server.getServerName(), this.znodeVersion); + + long startTime = EnvironmentEdgeManager.currentTimeMillis(); + int spins = 0; + // Now wait for the master to process the merge. We know it's done + // when the znode is deleted. The reason we keep tickling the znode is + // that it's possible for the master to miss an event. + do { + if (spins % 10 == 0) { + LOG.debug("Still waiting on the master to process the merge for " + + this.mergedRegionInfo.getEncodedName() + ", waited " + + (EnvironmentEdgeManager.currentTimeMillis() - startTime) + "ms"); + } + Thread.sleep(100); + // When this returns -1 it means the znode doesn't exist + this.znodeVersion = tickleNodeMerge(server.getZooKeeper(), + this.mergedRegionInfo, region_a.getRegionInfo(), + region_b.getRegionInfo(), server.getServerName(), this.znodeVersion); + spins++; + } while (this.znodeVersion != -1 && !server.isStopped() + && !services.isStopping()); + } catch (Exception e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + throw new IOException("Failed telling master about merge " + + mergedRegionInfo.getEncodedName(), e); + } + + + // Leaving here, the mergedir with its dross will be in place but since the + // merge was successful, just leave it; it'll be cleaned when region_a is + // cleaned up by CatalogJanitor on master + } + + /** + * Create reference file(s) of merging regions under the region_a merges dir + * @param hstoreFilesOfRegionA + * @param hstoreFilesOfRegionB + * @throws IOException + */ + private void mergeStoreFiles(FileSystem fs, + List hstoreFilesOfRegionA, + List hstoreFilesOfRegionB) + throws IOException { + + // Create reference file(s) of region A in mergdir + for (StoreFile sf : hstoreFilesOfRegionA) { + mergeStoreFile(fs, mergedRegionInfo, sf, this.mergesdir, this.region_a.getRegionInfo()); + } + + // Create reference file(s) of region B in mergedir + for (StoreFile sf : hstoreFilesOfRegionB) { + mergeStoreFile(fs, mergedRegionInfo, sf, this.mergesdir, this.region_b.getRegionInfo()); + } + } + + /** + * Write out a merge reference under the given merges directory. Package local + * so it doesnt leak out of regionserver. + * @param mergedRegion {@link HRegionInfo} of the merged region + * @param f File to create reference. + * @param mergedDir + * @return Path to created reference. + * @throws IOException + */ + Path mergeStoreFile(FileSystem fs, final HRegionInfo mergedRegion, + final StoreFile f, final Path mergedDir, HRegionInfo regionInfo) + throws IOException { + final String familyName = Bytes.toString(f.getFamily()); + Path referenceDir = new Path(new Path(mergedDir, + mergedRegion.getEncodedName()), familyName); + // A whole reference to the store file. + Reference r = Reference.createTopReference(regionInfo.getStartKey()); + // Add the referred-to regions name as a dot separated suffix. + // See REF_NAME_REGEX regex above. The referred-to regions name is + // up in the path of the passed in f -- parentdir is family, + // then the directory above is the region name. + String mergingRegionName = regionInfo.getEncodedName(); + // Write reference with same file id only with the other region name as + // suffix and into the new region location (under same family). + Path p = new Path(referenceDir, f.getPath().getName() + "." + + mergingRegionName); + return r.write(fs, p); + } + + /** + * @param server Hosting server instance (May be null when testing). + * @param services Services of regionserver, used to online regions. + * @throws IOException If thrown, rollback failed. Take drastic action. + * @return True if we successfully rolled back, false if we got to the point + * of no return and so now need to abort the server to minimize + * damage. + */ + public boolean rollback(final Server server, + final RegionServerServices services) throws IOException { + assert this.mergedRegionInfo != null; + boolean result = true; + ListIterator iterator = this.journal + .listIterator(this.journal.size()); + // Iterate in reverse. + while (iterator.hasPrevious()) { + JournalEntry je = iterator.previous(); + switch (je) { + + case SET_MERGING_IN_ZK: + if (server != null && server.getZooKeeper() != null) { + cleanZK(server, this.mergedRegionInfo); + } + break; + + case CREATED_MERGE_DIR: + this.region_a.writestate.writesEnabled = true; + this.region_b.writestate.writesEnabled = true; + this.region_a.cleanupMergesDir(); + break; + + case CLOSED_REGION_A: + try { + // So, this returns a seqid but if we just closed and then reopened, + // we should be ok. On close, we flushed using sequenceid obtained + // from hosting regionserver so no need to propagate the sequenceid + // returned out of initialize below up into regionserver as we + // normally do. + this.region_a.initialize(); + } catch (IOException e) { + LOG.error("Failed rollbacking CLOSED_REGION_A of region " + + this.region_a.getRegionNameAsString(), e); + throw new RuntimeException(e); + } + break; + + case OFFLINED_REGION_A: + if (services != null) + services.addToOnlineRegions(this.region_a); + break; + + case CLOSED_REGION_B: + try { + this.region_b.initialize(); + } catch (IOException e) { + LOG.error("Failed rollbacking CLOSED_REGION_A of region " + + this.region_b.getRegionNameAsString(), e); + throw new RuntimeException(e); + } + break; + + case OFFLINED_REGION_B: + if (services != null) + services.addToOnlineRegions(this.region_b); + break; + + case STARTED_MERGED_REGION_CREATION: + this.region_a.cleanupMergedRegion( + this.mergedRegionInfo); + break; + + case PONR: + // We got to the point-of-no-return so we need to just abort. Return + // immediately. Do not clean up created merged regions. + return false; + + default: + throw new RuntimeException("Unhandled journal entry: " + je); + } + } + return result; + } + + HRegionInfo getMergedRegionInfo() { + return this.mergedRegionInfo; + } + + // For unit testing. + Path getMergesDir() { + return this.mergesdir; + } + + private static void cleanZK(final Server server, final HRegionInfo hri) { + try { + // Only delete if its in expected state; could have been hijacked. + ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(), + EventType.RS_ZK_REGION_MERGING); + } catch (KeeperException.NoNodeException e) { + LOG.warn("Failed cleanup zk node of " + hri.getRegionNameAsString(), e); + } catch (KeeperException e) { + server.abort("Failed cleanup zk node of " + hri.getRegionNameAsString(),e); + } + + } + + /** + * Creates a new ephemeral node in the MERGING state for the merged region. + * Create it ephemeral in case regionserver dies mid-merge. + * + *

+ * Does not transition nodes from other states. If a node already exists for + * this region, a {@link NodeExistsException} will be thrown. + * + * @param zkw zk reference + * @param region region to be created as offline + * @param serverName server event originates from + * @return Version of znode created. + * @throws KeeperException + * @throws IOException + */ + int createNodeMerging(final ZooKeeperWatcher zkw, final HRegionInfo region, + final ServerName serverName) throws KeeperException, IOException { + LOG.debug(zkw.prefix("Creating ephemeral node for " + + region.getEncodedName() + " in MERGING state")); + RegionTransitionData rt = new RegionTransitionData( + EventType.RS_ZK_REGION_MERGING, region.getRegionName(), serverName); + String node = ZKAssign.getNodeName(zkw, region.getEncodedName()); + if (!ZKUtil.createEphemeralNodeAndWatch(zkw, node, rt.getBytes())) { + throw new IOException("Failed create of ephemeral " + node); + } + // Transition node from MERGING to MERGING and pick up version so we + // can be sure this znode is ours; version is needed deleting. + return transitionNodeMerging(zkw, region, serverName, -1); + } + + /** + * Transitions an existing node for the specified region which is currently in + * the MERGING state to be in the MERGE state. Converts the ephemeral MERGING + * znode to an ephemeral MERGE node. Master cleans up MERGE znode when it + * reads it (or if we crash, zk will clean it up). + * + *

+ * Does not transition nodes from other states. If for some reason the node + * could not be transitioned, the method returns -1. If the transition is + * successful, the version of the node after transition is returned. + * + *

+ * This method can fail and return false for three different reasons: + *

+ * + *

+ * Does not set any watches. + * + *

+ * This method should only be used by a RegionServer when completing the open + * of merged region. + * + * @param zkw zk reference + * @param merged region to be transitioned to opened + * @param a merging region A + * @param b merging region B + * @param serverName server event originates from + * @param znodeVersion expected version of data before modification + * @return version of node after transition, -1 if unsuccessful transition + * @throws KeeperException if unexpected zookeeper exception + * @throws IOException + */ + private static int transitionNodeMerge(ZooKeeperWatcher zkw, + HRegionInfo merged, HRegionInfo a, HRegionInfo b, ServerName serverName, + final int znodeVersion) throws KeeperException, IOException { + byte[] payload = Writables.getBytes(merged, a, b); + return ZKAssign.transitionNode(zkw, merged, serverName, + EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGE, + znodeVersion, payload); + } + + /** + * + * @param zkw zk reference + * @param parent region to be transitioned to merging + * @param serverName server event originates from + * @param version znode version + * @return version of node after transition, -1 if unsuccessful transition + * @throws KeeperException + * @throws IOException + */ + int transitionNodeMerging(final ZooKeeperWatcher zkw, + final HRegionInfo parent, final ServerName serverName, final int version) + throws KeeperException, IOException { + return ZKAssign.transitionNode(zkw, parent, serverName, + EventType.RS_ZK_REGION_MERGING, EventType.RS_ZK_REGION_MERGING, + version); + } + + private static int tickleNodeMerge(ZooKeeperWatcher zkw, HRegionInfo merged, + HRegionInfo a, HRegionInfo b, ServerName serverName, + final int znodeVersion) throws KeeperException, IOException { + byte[] payload = Writables.getBytes(merged, a, b); + return ZKAssign.transitionNode(zkw, merged, serverName, + EventType.RS_ZK_REGION_MERGE, EventType.RS_ZK_REGION_MERGE, + znodeVersion, payload); + } + + /** + * Checks if the given region has merge qualifier in .META. + * @param services + * @param regionName name of specified region + * @return true if the given region has merge qualifier in META.(It will be + * cleaned by CatalogJanitor) + * @throws IOException + */ + boolean hasMergeQualifierInMeta(final RegionServerServices services, + final byte[] regionName) throws IOException { + // Get merge regions if it is a merged region and already has merge + // qualifier + Pair mergeRegions = MetaReader + .getRegionsFromMergeQualifier(services.getCatalogTracker(), regionName); + if (mergeRegions != null && + (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) { + // It has merge qualifier + return true; + } + return false; + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java index 9408500..08b5412 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java @@ -96,4 +96,9 @@ public interface RegionServerServices extends OnlineRegions { * @return The RegionServer's "Leases" service */ public Leases getLeases(); + + /** + * @return The RegionServer's CatalogTracker + */ + public CatalogTracker getCatalogTracker(); } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java index 51fd7c8..8b6c9ae 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java @@ -512,8 +512,9 @@ public class SplitTransaction { void openDaughterRegion(final Server server, final HRegion daughter) throws IOException, KeeperException { HRegionInfo hri = daughter.getRegionInfo(); - LoggingProgressable reporter = server == null? null: - new LoggingProgressable(hri, server.getConfiguration()); + LoggingProgressable reporter = server == null ? null + : new LoggingProgressable(hri, server.getConfiguration().getLong( + "hbase.regionserver.split.daughter.open.log.interval", 10000)); daughter.openHRegion(reporter); } @@ -522,10 +523,9 @@ public class SplitTransaction { private long lastLog = -1; private final long interval; - LoggingProgressable(final HRegionInfo hri, final Configuration c) { + LoggingProgressable(final HRegionInfo hri, final long interval) { this.hri = hri; - this.interval = c.getLong("hbase.regionserver.split.daughter.open.log.interval", - 10000); + this.interval = interval; } @Override diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index f357583..f918580 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -452,8 +452,8 @@ public class Store extends SchemaConfigured implements HeapSize { if (ioe == null) ioe = new InterruptedIOException(e.getMessage()); } catch (ExecutionException e) { if (ioe == null) ioe = new IOException(e.getCause()); - } - } + } + } } finally { storeFileOpenerThreadPool.shutdownNow(); } diff --git a/src/main/java/org/apache/hadoop/hbase/util/Triple.java b/src/main/java/org/apache/hadoop/hbase/util/Triple.java new file mode 100644 index 0000000..bb75553 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/Triple.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +/** + * Utility class to manage a triple. + */ +public class Triple { + private A first; + private B second; + private C third; + + public Triple(A first, B second, C third) { + this.first = first; + this.second = second; + this.third = third; + } + + public int hashCode() { + int hashFirst = (first != null ? first.hashCode() : 0); + int hashSecond = (second != null ? second.hashCode() : 0); + int hashThird = (third != null ? third.hashCode() : 0); + + return (hashFirst >> 1) ^ hashSecond ^ (hashThird << 1); + } + + public boolean equals(Object obj) { + if (!(obj instanceof Triple)) { + return false; + } + + Triple otherTriple = (Triple) obj; + + if (first != otherTriple.first && (first != null && !(first.equals(otherTriple.first)))) + return false; + if (second != otherTriple.second && (second != null && !(second.equals(otherTriple.second)))) + return false; + if (third != otherTriple.third && (third != null && !(third.equals(otherTriple.third)))) + return false; + + return true; + } + + public String toString() { + return "(" + first + ", " + second + "," + third + " )"; + } + + public A getFirst() { + return first; + } + + public void setFirst(A first) { + this.first = first; + } + + public B getSecond() { + return second; + } + + public void setSecond(B second) { + this.second = second; + } + + public C getThird() { + return third; + } + + public void setThird(C third) { + this.third = third; + } +} + + + diff --git a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 7e4bffa..186aa28 100644 --- a/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -958,6 +958,23 @@ public class HBaseTestingUtility { } /** + * Create a table. + * @param tableName + * @param family + * @param splitRows + * @return An HTable instance for the created table. + * @throws IOException + */ + public HTable createTable(byte[] tableName, byte[] family, byte[][] splitRows) + throws IOException { + HTableDescriptor desc = new HTableDescriptor(tableName); + HColumnDescriptor hcd = new HColumnDescriptor(family); + desc.addFamily(hcd); + getHBaseAdmin().createTable(desc, splitRows); + return new HTable(getConfiguration(), tableName); + } + + /** * Drop an existing table * @param tableName existing table */ diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index 0d9aaf3..d4f17d0 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -59,13 +59,13 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; -import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.HFileArchiveUtil; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.junit.Test; @@ -242,24 +242,24 @@ public class TestCatalogJanitor { public HTableDescriptor remove(String tablename) throws IOException { return null; } - + @Override public Map getAll() throws IOException { return null; } - + @Override public HTableDescriptor get(byte[] tablename) throws IOException { return get(Bytes.toString(tablename)); } - + @Override public HTableDescriptor get(String tablename) throws IOException { return createHTableDescriptor(); } - + @Override public void add(HTableDescriptor htd) throws IOException { } @@ -313,6 +313,11 @@ public class TestCatalogJanitor { public boolean shouldSplitMetaSeparately() { return false; } + + @Override + public void dispatchMergingRegions(HRegionInfo region_a, HRegionInfo region_b, + boolean forcible) throws IOException { + } } @Test @@ -566,9 +571,11 @@ public class TestCatalogJanitor { splita.setOffline(true);//simulate that splita goes offline when it is split splitParents.put(splita, makeResultFromHRegionInfo(splita, splitaa, splitab)); + final Map mergedRegions = new TreeMap(); CatalogJanitor janitor = spy(new CatalogJanitor(server, services)); - doReturn(new Pair>( - 10, splitParents)).when(janitor).getSplitParents(); + doReturn(new Triple, Map>( + 10, mergedRegions, splitParents)).when(janitor) + .getMergedRegionsAndSplitParents(); //create ref from splita to parent Path splitaRef = diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransaction.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransaction.java new file mode 100644 index 0000000..383da0e --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransaction.java @@ -0,0 +1,426 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.zookeeper.KeeperException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +import com.google.common.collect.ImmutableList; + +/** + * Test the {@link RegionMergeTransaction} class against two HRegions (as + * opposed to running cluster). + */ +@Category(SmallTests.class) +public class TestRegionMergeTransaction { + private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final Path testdir = TEST_UTIL.getDataTestDir(this.getClass() + .getName()); + private HRegion region_a; + private HRegion region_b; + private HRegion region_c; + private HLog wal; + private FileSystem fs; + // Start rows of region_a,region_b,region_c + private static final byte[] STARTROW_A = new byte[] { 'a', 'a', 'a' }; + private static final byte[] STARTROW_B = new byte[] { 'g', 'g', 'g' }; + private static final byte[] STARTROW_C = new byte[] { 'w', 'w', 'w' }; + private static final byte[] ENDROW = new byte[] { '{', '{', '{' }; + private static final byte[] CF = HConstants.CATALOG_FAMILY; + + @Before + public void setup() throws IOException { + this.fs = FileSystem.get(TEST_UTIL.getConfiguration()); + this.fs.delete(this.testdir, true); + this.wal = new HLog(fs, new Path(this.testdir, "logs"), new Path(this.testdir, ".oldlogs"), + TEST_UTIL.getConfiguration()); + this.region_a = createRegion(this.testdir, this.wal, STARTROW_A, STARTROW_B); + this.region_b = createRegion(this.testdir, this.wal, STARTROW_B, STARTROW_C); + this.region_c = createRegion(this.testdir, this.wal, STARTROW_C, ENDROW); + assert region_a != null && region_b != null && region_c != null; + TEST_UTIL.getConfiguration().setBoolean("hbase.testing.nocluster", true); + } + + @After + public void teardown() throws IOException { + for (HRegion region : new HRegion[] { region_a, region_b, region_c }) { + if (region != null && !region.isClosed()) region.close(); + if (this.fs.exists(region.getRegionDir()) + && !this.fs.delete(region.getRegionDir(), true)) { + throw new IOException("Failed deleting of " + + region.getRegionDir()); + } + } + if (this.wal != null) + this.wal.closeAndDelete(); + this.fs.delete(this.testdir, true); + } + + /** + * Test straight prepare works. Tries to merge on {@link #region_a} and + * {@link #region_b} + * @throws IOException + */ + @Test + public void testPrepare() throws IOException { + prepareOnGoodRegions(); + } + + private RegionMergeTransaction prepareOnGoodRegions() throws IOException { + RegionMergeTransaction mt = new RegionMergeTransaction(region_a, region_b, + false); + RegionMergeTransaction spyMT = Mockito.spy(mt); + doReturn(false).when(spyMT).hasMergeQualifierInMeta(null, + region_a.getRegionName()); + doReturn(false).when(spyMT).hasMergeQualifierInMeta(null, + region_b.getRegionName()); + assertTrue(spyMT.prepare(null)); + return spyMT; + } + + /** + * Test merging the same region + */ + @Test + public void testPrepareWithSameRegion() throws IOException { + RegionMergeTransaction mt = new RegionMergeTransaction(this.region_a, + this.region_a, true); + assertFalse("should not merge the same region even if it is forcible ", + mt.prepare(null)); + } + + /** + * Test merging two not adjacent regions under a common merge + */ + @Test + public void testPrepareWithRegionsNotAdjacent() throws IOException { + RegionMergeTransaction mt = new RegionMergeTransaction(this.region_a, + this.region_c, false); + assertFalse("should not merge two regions if they are adjacent except it is forcible", + mt.prepare(null)); + } + + /** + * Test merging two not adjacent regions under a compulsory merge + */ + @Test + public void testPrepareWithRegionsNotAdjacentUnderCompulsory() + throws IOException { + RegionMergeTransaction mt = new RegionMergeTransaction(region_a, region_c, + true); + RegionMergeTransaction spyMT = Mockito.spy(mt); + doReturn(false).when(spyMT).hasMergeQualifierInMeta(null, + region_a.getRegionName()); + doReturn(false).when(spyMT).hasMergeQualifierInMeta(null, + region_c.getRegionName()); + assertTrue("Since focible is true, should merge two regions even if they are not adjacent", + spyMT.prepare(null)); + } + + /** + * Pass a reference store + */ + @Test + public void testPrepareWithRegionsWithReference() throws IOException { + Store storeMock = Mockito.mock(Store.class); + when(storeMock.hasReferences()).thenReturn(true); + when(storeMock.getFamily()).thenReturn(new HColumnDescriptor("cf")); + when(storeMock.close()).thenReturn(ImmutableList.of()); + this.region_a.stores.put(Bytes.toBytes(""), storeMock); + RegionMergeTransaction mt = new RegionMergeTransaction(this.region_a, + this.region_b, false); + assertFalse( + "a region should not be mergeable if it has instances of store file references", + mt.prepare(null)); + } + + @Test + public void testPrepareWithClosedRegion() throws IOException { + this.region_a.close(); + RegionMergeTransaction mt = new RegionMergeTransaction(this.region_a, + this.region_b, false); + assertFalse(mt.prepare(null)); + } + + /** + * Test merging regions which are merged regions and has reference in META all + * the same + */ + @Test + public void testPrepareWithRegionsWithMergeReference() throws IOException { + RegionMergeTransaction mt = new RegionMergeTransaction(region_a, region_b, + false); + RegionMergeTransaction spyMT = Mockito.spy(mt); + doReturn(true).when(spyMT).hasMergeQualifierInMeta(null, + region_a.getRegionName()); + doReturn(true).when(spyMT).hasMergeQualifierInMeta(null, + region_b.getRegionName()); + assertFalse(spyMT.prepare(null)); + } + + @Test + public void testWholesomeMerge() throws IOException { + final int rowCountOfRegionA = loadRegion(this.region_a, CF, true); + final int rowCountOfRegionB = loadRegion(this.region_b, CF, true); + assertTrue(rowCountOfRegionA > 0 && rowCountOfRegionB > 0); + assertEquals(rowCountOfRegionA, countRows(this.region_a)); + assertEquals(rowCountOfRegionB, countRows(this.region_b)); + + // Start transaction. + RegionMergeTransaction mt = prepareOnGoodRegions(); + + // Run the execute. Look at what it returns. + Server mockServer = Mockito.mock(Server.class); + when(mockServer.getConfiguration()) + .thenReturn(TEST_UTIL.getConfiguration()); + HRegion mergedRegion = mt.execute(mockServer, null); + // Do some assertions about execution. + assertTrue(this.fs.exists(mt.getMergesDir())); + // Assert region_a and region_b is closed. + assertTrue(region_a.isClosed()); + assertTrue(region_b.isClosed()); + + // Assert mergedir is empty -- because its content will have been moved out + // to be under the merged region dirs. + assertEquals(0, this.fs.listStatus(mt.getMergesDir()).length); + // Check merged region have correct key span. + assertTrue(Bytes.equals(this.region_a.getStartKey(), + mergedRegion.getStartKey())); + assertTrue(Bytes.equals(this.region_b.getEndKey(), + mergedRegion.getEndKey())); + // Count rows. merged region are already open + try { + int mergedRegionRowCount = countRows(mergedRegion); + assertEquals((rowCountOfRegionA + rowCountOfRegionB), + mergedRegionRowCount); + } finally { + HRegion.closeHRegion(mergedRegion); + } + // Assert the write lock is no longer held on region_a and region_b + assertTrue(!this.region_a.lock.writeLock().isHeldByCurrentThread()); + assertTrue(!this.region_b.lock.writeLock().isHeldByCurrentThread()); + } + + @Test + public void testRollback() throws IOException { + final int rowCountOfRegionA = loadRegion(this.region_a, CF, true); + final int rowCountOfRegionB = loadRegion(this.region_b, CF, true); + assertTrue(rowCountOfRegionA > 0 && rowCountOfRegionB > 0); + assertEquals(rowCountOfRegionA, countRows(this.region_a)); + assertEquals(rowCountOfRegionB, countRows(this.region_b)); + + // Start transaction. + RegionMergeTransaction mt = prepareOnGoodRegions(); + + when(mt.createMergedRegionFromMerges(region_a, region_b, + mt.getMergedRegionInfo())).thenThrow( + new MockedFailedMergedRegionCreation()); + + // Run the execute. Look at what it returns. + boolean expectedException = false; + Server mockServer = Mockito.mock(Server.class); + when(mockServer.getConfiguration()) + .thenReturn(TEST_UTIL.getConfiguration()); + try { + mt.execute(mockServer, null); + } catch (MockedFailedMergedRegionCreation e) { + expectedException = true; + } + assertTrue(expectedException); + // Run rollback + assertTrue(mt.rollback(null, null)); + + // Assert I can scan region_a and region_b. + int rowCountOfRegionA2 = countRows(this.region_a); + assertEquals(rowCountOfRegionA, rowCountOfRegionA2); + int rowCountOfRegionB2 = countRows(this.region_b); + assertEquals(rowCountOfRegionB, rowCountOfRegionB2); + + // Assert rollback cleaned up stuff in fs + assertTrue(!this.fs.exists(HRegion.getRegionDir(this.testdir, + mt.getMergedRegionInfo()))); + + assertTrue(!this.region_a.lock.writeLock().isHeldByCurrentThread()); + assertTrue(!this.region_b.lock.writeLock().isHeldByCurrentThread()); + + // Now retry the merge but do not throw an exception this time. + assertTrue(mt.prepare(null)); + HRegion mergedRegion = mt.execute(mockServer, null); + // Count rows. daughters are already open + // Count rows. merged region are already open + try { + int mergedRegionRowCount = countRows(mergedRegion); + assertEquals((rowCountOfRegionA + rowCountOfRegionB), + mergedRegionRowCount); + } finally { + HRegion.closeHRegion(mergedRegion); + } + // Assert the write lock is no longer held on region_a and region_b + assertTrue(!this.region_a.lock.writeLock().isHeldByCurrentThread()); + assertTrue(!this.region_b.lock.writeLock().isHeldByCurrentThread()); + } + + @Test + public void testFailAfterPONR() throws IOException, KeeperException { + final int rowCountOfRegionA = loadRegion(this.region_a, CF, true); + final int rowCountOfRegionB = loadRegion(this.region_b, CF, true); + assertTrue(rowCountOfRegionA > 0 && rowCountOfRegionB > 0); + assertEquals(rowCountOfRegionA, countRows(this.region_a)); + assertEquals(rowCountOfRegionB, countRows(this.region_b)); + + // Start transaction. + RegionMergeTransaction mt = prepareOnGoodRegions(); + Mockito.doThrow(new MockedFailedMergedRegionOpen()) + .when(mt) + .openMergedRegion((Server) Mockito.anyObject(), + (RegionServerServices) Mockito.anyObject(), + (HRegion) Mockito.anyObject()); + + // Run the execute. Look at what it returns. + boolean expectedException = false; + Server mockServer = Mockito.mock(Server.class); + when(mockServer.getConfiguration()) + .thenReturn(TEST_UTIL.getConfiguration()); + try { + mt.execute(mockServer, null); + } catch (MockedFailedMergedRegionOpen e) { + expectedException = true; + } + assertTrue(expectedException); + // Run rollback returns false that we should restart. + assertFalse(mt.rollback(null, null)); + // Make sure that merged region is still in the filesystem, that + // they have not been removed; this is supposed to be the case if we go + // past point of no return. + Path tableDir = this.region_a.getRegionDir() + .getParent(); + Path mergedRegionDir = new Path(tableDir, mt.getMergedRegionInfo() + .getEncodedName()); + assertTrue(TEST_UTIL.getTestFileSystem().exists(mergedRegionDir)); + } + + /** + * Exception used in this class only. + */ + @SuppressWarnings("serial") + private class MockedFailedMergedRegionCreation extends IOException { + } + + @SuppressWarnings("serial") + private class MockedFailedMergedRegionOpen extends IOException { + } + + private HRegion createRegion(final Path testdir, final HLog wal, + final byte[] startrow, final byte[] endrow) + throws IOException { + // Make a region with start and end keys. + HTableDescriptor htd = new HTableDescriptor("table"); + HColumnDescriptor hcd = new HColumnDescriptor(CF); + htd.addFamily(hcd); + HRegionInfo hri = new HRegionInfo(htd.getName(), startrow, endrow); + HRegion a = HRegion.createHRegion(hri, testdir, + TEST_UTIL.getConfiguration(), htd); + HRegion.closeHRegion(a); + return HRegion.openHRegion(testdir, hri, htd, wal, + TEST_UTIL.getConfiguration()); + } + + private int countRows(final HRegion r) throws IOException { + int rowcount = 0; + InternalScanner scanner = r.getScanner(new Scan()); + try { + List kvs = new ArrayList(); + boolean hasNext = true; + while (hasNext) { + hasNext = scanner.next(kvs); + if (!kvs.isEmpty()) + rowcount++; + } + } finally { + scanner.close(); + } + return rowcount; + } + + /** + * Load region with rows from 'aaa' to 'zzz', skip the rows which are out of + * range of the region + * @param r Region + * @param f Family + * @param flush flush the cache if true + * @return Count of rows loaded. + * @throws IOException + */ + private int loadRegion(final HRegion r, final byte[] f, final boolean flush) + throws IOException { + byte[] k = new byte[3]; + int rowCount = 0; + for (byte b1 = 'a'; b1 <= 'z'; b1++) { + for (byte b2 = 'a'; b2 <= 'z'; b2++) { + for (byte b3 = 'a'; b3 <= 'z'; b3++) { + k[0] = b1; + k[1] = b2; + k[2] = b3; + if (!HRegion.rowIsInRange(r.getRegionInfo(), k)) { + continue; + } + Put put = new Put(k); + put.add(f, null, k); + if (r.getLog() == null) + put.setWriteToWAL(false); + r.put(put); + rowCount++; + } + } + if (flush) { + r.flushcache(); + } + } + return rowCount; + } + +} diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java new file mode 100644 index 0000000..54505ae --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java @@ -0,0 +1,327 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.master.CatalogJanitor; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.google.common.base.Joiner; + +/** + * Like {@link TestRegionMergeTransaction} in that we're testing + * {@link RegionMergeTransaction} only the below tests are against a running + * cluster where {@link TestRegionMergeTransaction} is tests against bare + * {@link HRegion}. + */ +@Category(LargeTests.class) +public class TestRegionMergeTransactionOnCluster { + private static final Log LOG = LogFactory + .getLog(TestRegionMergeTransactionOnCluster.class); + private static final int NB_SERVERS = 3; + + private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); + private static final byte[] QUALIFIER = Bytes.toBytes("q"); + + private static byte[] ROW = Bytes.toBytes("testRow"); + private static final int INITIAL_REGION_NUM = 10; + private static final int ROWSIZE = 200; + private static byte[][] ROWS = makeN(ROW, ROWSIZE); + + private static int waitTime = 60 * 1000; + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private static HMaster master; + private static HBaseAdmin admin; + + @BeforeClass + public static void beforeAllTests() throws Exception { + // Start a cluster + TEST_UTIL.startMiniCluster(NB_SERVERS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + master = cluster.getMaster(); + master.balanceSwitch(false); + admin = TEST_UTIL.getHBaseAdmin(); + } + + @AfterClass + public static void afterAllTests() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testWholesomeMerge() throws Exception { + LOG.info("Starting testWholesomeMerge"); + final byte[] tableName = Bytes.toBytes("testWholesomeMerge"); + + // Create table and load data. + HTable table = createTableAndLoadData(master, tableName); + // Merge 1st and 2nd region + mergeRegionsAndVerifyRegionNum(master, tableName, 0, 1, + INITIAL_REGION_NUM - 1); + + // Merge 2nd and 3th region + mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2, + INITIAL_REGION_NUM - 2); + + verifyRowCount(table, ROWSIZE); + + table.close(); + + } + + @Test + public void testCleanMergeReference() throws Exception { + LOG.info("Starting testCleanMergeReference"); + master.setCatalogJanitorEnabled(false); + try { + final byte[] tableName = Bytes.toBytes("testCleanMergeReference"); + // Create table and load data. + HTable table = createTableAndLoadData(master, tableName); + // Merge 1st and 2nd region + mergeRegionsAndVerifyRegionNum(master, tableName, 0, 1, + INITIAL_REGION_NUM - 1); + verifyRowCount(table, ROWSIZE); + table.close(); + + List> tableRegions = MetaReader + .getTableRegionsAndLocations(master.getCatalogTracker(), + Bytes.toString(tableName)); + HRegionInfo mergedRegionInfo = tableRegions.get(0).getFirst(); + HTableDescriptor tableDescritor = master.getTableDescriptors().get( + Bytes.toString(tableName)); + Result mergedRegionResult = MetaReader.getRegionResult( + master.getCatalogTracker(), mergedRegionInfo.getRegionName()); + + // contains merge reference in META + assertTrue(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.MERGEA_QUALIFIER) != null); + assertTrue(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.MERGEB_QUALIFIER) != null); + + // merging regions' directory are in the file system all the same + HRegionInfo regionA = MetaReader.parseHRegionInfoFromCatalogResult(mergedRegionResult, + HConstants.MERGEA_QUALIFIER); + HRegionInfo regionB = MetaReader.parseHRegionInfoFromCatalogResult(mergedRegionResult, + HConstants.MERGEB_QUALIFIER); + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + Path rootDir = master.getMasterFileSystem().getRootDir(); + + Path tabledir = new Path(rootDir, mergedRegionInfo.getTableNameAsString()); + Path regionAdir = new Path(tabledir, regionA.getEncodedName()); + Path regionBdir = new Path(tabledir, regionB.getEncodedName()); + assertTrue(fs.exists(regionAdir)); + assertTrue(fs.exists(regionBdir)); + + admin.compact(mergedRegionInfo.getRegionName()); + // wait until merged region doesn't have reference file + long timeout = System.currentTimeMillis() + waitTime; +// HRegionFileSystem hrfs = new HRegionFileSystem( +// TEST_UTIL.getConfiguration(), fs, tabledir, mergedRegionInfo); + HRegion region = new HRegion(tabledir, null, fs, TEST_UTIL.getConfiguration(), + mergedRegionInfo, table.getTableDescriptor(), null); + + while (System.currentTimeMillis() < timeout) { + if (!region.hasReferences()) { + break; + } + Thread.sleep(50); + } + assertFalse(region.hasReferences()); + + // run CatalogJanitor to clean merge references in META and archive the + // files of merging regions + //HACK: get catalog janitor through master's private field + CatalogJanitor janitor = (CatalogJanitor)getField(master, "catalogJanitorChore"); + int cleaned = (Integer)invokeMethod(janitor, "scan", new Class[0], new Object[0]); + assertTrue(cleaned > 0); + assertFalse(fs.exists(regionAdir)); + assertFalse(fs.exists(regionBdir)); + + mergedRegionResult = MetaReader.getRegionResult( + master.getCatalogTracker(), mergedRegionInfo.getRegionName()); + assertFalse(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.MERGEA_QUALIFIER) != null); + assertFalse(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.MERGEB_QUALIFIER) != null); + + } finally { + master.setCatalogJanitorEnabled(true); + } + + + } + + /** + * Returns the value of a private or a protected field using reflection. + */ + private Object getField(Object obj, String fieldName) + throws SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException { + Field field = obj.getClass().getDeclaredField(fieldName); + field.setAccessible(true); + return field.get(obj); + } + + /** + * Invokes the named function on the object with the given arguments. + */ + private Object invokeMethod(Object obj, String function, Class[] parameterTypes, Object... args) + throws SecurityException, NoSuchMethodException, IllegalArgumentException, + IllegalAccessException, InvocationTargetException { + Method method = obj.getClass().getDeclaredMethod(function, parameterTypes); + method.setAccessible(true); + return method.invoke(obj, args); + } + + private void mergeRegionsAndVerifyRegionNum(HMaster master, byte[] tablename, + int regionAnum, int regionBnum, int expectedRegionNum) throws Exception { + requestMergeRegion(master, tablename, regionAnum, regionBnum); + waitAndVerifyRegionNum(master, tablename, expectedRegionNum); + } + + private void requestMergeRegion(HMaster master, byte[] tablename, + int regionAnum, int regionBnum) throws Exception { + List> tableRegions = MetaReader + .getTableRegionsAndLocations(master.getCatalogTracker(), + Bytes.toString(tablename)); + TEST_UTIL.getHBaseAdmin().mergeRegions( + tableRegions.get(regionAnum).getFirst().getEncodedNameAsBytes(), + tableRegions.get(regionBnum).getFirst().getEncodedNameAsBytes(), false); + } + + private void waitAndVerifyRegionNum(HMaster master, byte[] tablename, + int expectedRegionNum) throws Exception { + List> tableRegionsInMeta; + List tableRegionsInMaster; + long timeout = System.currentTimeMillis() + waitTime; + while (System.currentTimeMillis() < timeout) { + tableRegionsInMeta = MetaReader.getTableRegionsAndLocations( + master.getCatalogTracker(), Bytes.toString(tablename)); + tableRegionsInMaster = master.getAssignmentManager() + .getRegionsOfTable(tablename); + if (tableRegionsInMeta.size() == expectedRegionNum + && tableRegionsInMaster.size() == expectedRegionNum) { + break; + } + Thread.sleep(250); + } + + tableRegionsInMeta = MetaReader.getTableRegionsAndLocations( + master.getCatalogTracker(), Bytes.toString(tablename)); + LOG.info("Regions after merge:" + Joiner.on(',').join(tableRegionsInMeta)); + assertEquals(expectedRegionNum, tableRegionsInMeta.size()); + } + + private HTable createTableAndLoadData(HMaster master, byte[] tablename) + throws Exception { + return createTableAndLoadData(master, tablename, INITIAL_REGION_NUM); + } + + private HTable createTableAndLoadData(HMaster master, byte[] tablename, + int numRegions) throws Exception { + assertTrue("ROWSIZE must > numregions:" + numRegions, ROWSIZE > numRegions); + byte[][] splitRows = new byte[numRegions - 1][]; + for (int i = 0; i < splitRows.length; i++) { + splitRows[i] = ROWS[(i + 1) * ROWSIZE / numRegions]; + } + + HTable table = TEST_UTIL.createTable(tablename, FAMILYNAME, splitRows); + loadData(table); + verifyRowCount(table, ROWSIZE); + + // sleep here is an ugly hack to allow region transitions to finish + long timeout = System.currentTimeMillis() + waitTime; + List> tableRegions; + while (System.currentTimeMillis() < timeout) { + tableRegions = MetaReader.getTableRegionsAndLocations( + master.getCatalogTracker(), Bytes.toString(tablename)); + if (tableRegions.size() == numRegions) + break; + Thread.sleep(250); + } + + tableRegions = MetaReader.getTableRegionsAndLocations( + master.getCatalogTracker(), Bytes.toString(tablename)); + LOG.info("Regions after load: " + Joiner.on(',').join(tableRegions)); + assertEquals(numRegions, tableRegions.size()); + return table; + } + + private static byte[][] makeN(byte[] base, int n) { + byte[][] ret = new byte[n][]; + for (int i = 0; i < n; i++) { + ret[i] = Bytes.add(base, Bytes.toBytes(String.format("%04d", i))); + } + return ret; + } + + private void loadData(HTable table) throws IOException { + for (int i = 0; i < ROWSIZE; i++) { + Put put = new Put(ROWS[i]); + put.add(FAMILYNAME, QUALIFIER, Bytes.toBytes(i)); + table.put(put); + } + } + + private void verifyRowCount(HTable table, int expectedRegionNum) + throws IOException { + ResultScanner scanner = table.getScanner(new Scan()); + int rowCount = 0; + while (scanner.next() != null) { + rowCount++; + } + assertEquals(expectedRegionNum, rowCount); + scanner.close(); + } + +} diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java index b935cd2..b341435 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransaction.java @@ -72,7 +72,7 @@ public class TestSplitTransaction { private static final byte [] ENDROW = new byte [] {'{', '{', '{'}; private static final byte [] GOOD_SPLIT_ROW = new byte [] {'d', 'd', 'd'}; private static final byte [] CF = HConstants.CATALOG_FAMILY; - + @Before public void setup() throws IOException { this.fs = FileSystem.get(TEST_UTIL.getConfiguration()); this.fs.delete(this.testdir, true); @@ -163,6 +163,7 @@ public class TestSplitTransaction { storeFileList.add(storeFileMock); when(storeMock.getStorefiles()).thenReturn(storeFileList); when(storeMock.close()).thenReturn(ImmutableList.copyOf(storeFileList)); + when(storeMock.hasReferences()).thenReturn(true); this.parent.stores.put(Bytes.toBytes(""), storeMock); SplitTransaction st = new SplitTransaction(this.parent, GOOD_SPLIT_ROW);