diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
index 01ea8c1..c0c2789 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
@@ -186,7 +186,7 @@ public class MetaReader {
* @return An {@link HTable} for hbase:meta
* @throws IOException
*/
- static HTable getMetaHTable(final CatalogTracker ct)
+ public static HTable getMetaHTable(final CatalogTracker ct)
throws IOException {
return getHTable(ct, TableName.META_TABLE_NAME);
}
@@ -265,7 +265,7 @@ public class MetaReader {
}
/** Returns the row key to use for this regionInfo */
- protected static byte[] getMetaKeyForRegion(HRegionInfo regionInfo) {
+ public static byte[] getMetaKeyForRegion(HRegionInfo regionInfo) {
return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName();
}
@@ -612,7 +612,7 @@ public class MetaReader {
* Returns the column family used for meta columns.
* @return HConstants.CATALOG_FAMILY.
*/
- protected static byte[] getFamily() {
+ public static byte[] getFamily() {
return HConstants.CATALOG_FAMILY;
}
@@ -664,6 +664,19 @@ public class MetaReader {
}
/**
+ * Returns the column qualifier for daughter column for replicaId
+ * @param replicaId the replicaId of the region
+ * @return a byte[] for daughter column qualifier
+ */
+ @VisibleForTesting
+ public static byte[] getDaughterReplicaQualifier(int replicaId) {
+ return replicaId == 0
+ ? HConstants.DAUGHTER_QUALIFIER
+ : Bytes.toBytes(HConstants.DAUGHTER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
+ + String.format(HRegionInfo.REPLICA_ID_FORMAT, replicaId));
+ }
+
+ /**
* Parses the replicaId from the server column qualifier. See top of the class javadoc
* for the actual meta layout
* @param serverColumn the column qualifier
@@ -744,8 +757,13 @@ public class MetaReader {
if (replicaId < 0) {
break;
}
+ byte[] daughter = getDaughterReplicaQualifier(replicaId);
+ HRegionInfo h = getHRegionInfo(r, daughter);
+ if (h == null) {
+ h = regionInfo;
+ }
- locations.add(getRegionLocation(r, regionInfo, replicaId));
+ locations.add(getRegionLocation(r, h, replicaId));
}
return new RegionLocations(locations);
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 0712469..00f582d 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -401,6 +401,11 @@ public final class HConstants {
/** The open seqnum column qualifier */
public static final byte [] SEQNUM_QUALIFIER = Bytes.toBytes(SEQNUM_QUALIFIER_STR);
+ /** The daughter qualifier */
+ public static final String DAUGHTER_QUALIFIER_STR = "daughter";
+ /** The daughter qualifier */
+ public static final byte [] DAUGHTER_QUALIFIER = Bytes.toBytes(DAUGHTER_QUALIFIER_STR);
+
/** The lower-half split region column qualifier */
public static final byte [] SPLITA_QUALIFIER = Bytes.toBytes("splitA");
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
index e236d99..77270f3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java
@@ -30,12 +30,15 @@ import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
+import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
+import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
@@ -383,6 +386,9 @@ public class MetaEditor extends MetaReader {
HRegionInfo copyOfParent = new HRegionInfo(parent);
copyOfParent.setOffline(true);
copyOfParent.setSplit(true);
+ byte[] key = MetaReader.getMetaKeyForRegion(parent);
+ Result result = meta.get(new Get(key));
+ RegionLocations rl = MetaReader.getRegionLocations(result);
//Put for parent
Put putParent = makePutFromRegionInfo(copyOfParent);
@@ -394,6 +400,21 @@ public class MetaEditor extends MetaReader {
addLocation(putA, sn, 1, splitA.getReplicaId()); //new regions, openSeqNum = 1 is fine.
addLocation(putB, sn, 1, splitB.getReplicaId());
+ // set the replicas to point to the locations of the old replicas
+ for (int i = 1; i < rl.size(); i++) {
+ ServerName s;
+ byte[] parentHri;
+ if (rl.getRegionLocation(i) == null) { // if null then don't know anything about replica
+ continue;
+ } else {
+ s = rl.getRegionLocation(i).getServerName();
+ parentHri = rl.getRegionLocation(i).getRegionInfo().toByteArray();
+ }
+ addLocation(putA, s, rl.getRegionLocation(i).getSeqNum(), i);
+ addLocation(putB, s, rl.getRegionLocation(i).getSeqNum(), i);
+ putA.addImmutable(HConstants.CATALOG_FAMILY, MetaReader.getDaughterReplicaQualifier(i), parentHri);
+ putB.addImmutable(HConstants.CATALOG_FAMILY, MetaReader.getDaughterReplicaQualifier(i), parentHri);
+ }
byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER);
multiMutate(meta, tableRow, putParent, putA, putB);
@@ -463,9 +484,20 @@ public class MetaEditor extends MetaReader {
HRegionInfo regionInfo, ServerName sn, long openSeqNum)
throws IOException {
// region replicas are kept in the primary region's row
- Put put = new Put(getMetaKeyForRegion(regionInfo));
+ byte[] metaRow = getMetaKeyForRegion(regionInfo);
+ Put put = new Put(metaRow);
addLocation(put, sn, openSeqNum, regionInfo.getReplicaId());
- putToCatalogTable(catalogTracker, put);
+ put.addImmutable(HConstants.CATALOG_FAMILY,
+ MetaReader.getServerColumn(regionInfo.getReplicaId()),
+ Bytes.toBytes(sn.getHostAndPort()));
+ if (regionInfo.getReplicaId() != 0) {
+ Delete d = new Delete(metaRow);
+ d.deleteColumn(HConstants.CATALOG_FAMILY,
+ MetaReader.getDaughterReplicaQualifier(regionInfo.getReplicaId()));
+ multiMutate(MetaReader.getMetaHTable(catalogTracker), metaRow, put, d);
+ } else {
+ putToCatalogTable(catalogTracker, put);
+ }
LOG.info("Updated row " + regionInfo.getRegionNameAsString() +
" with server=" + sn);
}
@@ -573,7 +605,7 @@ public class MetaEditor extends MetaReader {
return p;
}
- private static Put addLocation(final Put p, final ServerName sn, long openSeqNum, int replicaId){
+ public static Put addLocation(final Put p, final ServerName sn, long openSeqNum, int replicaId){
p.addImmutable(HConstants.CATALOG_FAMILY, MetaReader.getServerColumn(replicaId),
Bytes.toBytes(sn.getHostAndPort()));
p.addImmutable(HConstants.CATALOG_FAMILY, MetaReader.getStartCodeColumn(replicaId),
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index a7c3f69..99e3d2a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -30,6 +30,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
+import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
@@ -3555,6 +3556,13 @@ public class AssignmentManager extends ZooKeeperListener {
}
if (et == EventType.RS_ZK_REGION_SPLIT) {
+ // split replicas
+ try {
+ doSplittingOfReplicas(rs_p.getRegion(), hri_a, hri_b,
+ ((MasterServices)server).getTableDescriptors().get(p.getTable()).getRegionReplication());
+ } catch (IOException e) {
+ LOG.warn("Failed to handle splits for replica regions " + e);
+ }
LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
// Remove region from ZK
try {
@@ -3586,6 +3594,47 @@ public class AssignmentManager extends ZooKeeperListener {
return true;
}
+ private void doSplittingOfReplicas(final HRegionInfo parentHri, final HRegionInfo hri_a,
+ final HRegionInfo hri_b, final int numReplicas) {
+ // create new regions for the replica, and assign them to match with the
+ // current replica assignments. If replica1 of parent is assigned to RS1,
+ // the replica1s of daughters will be on the same machine
+ Map map = new HashMap();
+ for (int i = 1; i < numReplicas; i++) {
+ prepareDaughterReplicaForAssignment(hri_a, parentHri, i, map);
+ prepareDaughterReplicaForAssignment(hri_b, parentHri, i, map);
+ }
+ try {
+ assign(map);
+ } catch (IOException e) {
+ LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)");
+ } catch (InterruptedException e) {
+ LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)");
+ }
+ // unassign the old replicas
+ for (int i = 1; i < numReplicas; i++) {
+ HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(parentHri, i);
+ LOG.debug("Unassigning replica for split parent " + h);
+ unassign(RegionReplicaUtil.getRegionInfoForReplica(parentHri, i));
+ }
+ }
+
+ private void prepareDaughterReplicaForAssignment(HRegionInfo daughterHri, HRegionInfo parentHri,
+ int replicaId, Map map) {
+ HRegionInfo parentReplica = RegionReplicaUtil.getRegionInfoForReplica(parentHri, replicaId);
+ HRegionInfo daughterReplica = RegionReplicaUtil.getRegionInfoForReplica(daughterHri,
+ replicaId);
+ LOG.debug("Created replica region for daughter " + daughterReplica);
+ ServerName sn;
+ if ((sn = regionStates.getRegionServerOfRegion(parentReplica)) != null) {
+ map.put(daughterReplica, sn);
+ } else {
+ List servers = serverManager.getOnlineServersList();
+ sn = servers.get((new Random(System.currentTimeMillis())).nextInt(servers.size()));
+ map.put(daughterReplica, sn);
+ }
+ }
+
/**
* A region is offline. The new state should be the specified one,
* if not null. If the specified state is null, the new state is Offline.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index 59bc01e..0207029 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -23,7 +23,11 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -44,6 +48,7 @@ import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.Bytes;
@@ -110,12 +115,13 @@ public class CatalogJanitor extends Chore {
/**
* Scans hbase:meta and returns a number of scanned rows, and a map of merged
* regions, and an ordered map of split parents.
- * @return triple of scanned rows, map of merged regions and map of split
- * parent regioninfos
+ * @return triple of scanned rows, map of merged regions and a pair consisting of
+ * map of split parent regioninfos and the set of parents to which daughters still have
+ * back references to [TODO: the signature is ugly; fix it]
* @throws IOException
*/
- Triple, Map> getMergedRegionsAndSplitParents()
- throws IOException {
+ Triple, Pair