diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
index c01e722..e5e3c2f 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java
@@ -188,7 +188,7 @@ public class MetaTableAccessor {
* @return An {@link HTable} for hbase:meta
* @throws IOException
*/
- static HTable getMetaHTable(final HConnection hConnection)
+ public static HTable getMetaHTable(final HConnection hConnection)
throws IOException {
return getHTable(hConnection, TableName.META_TABLE_NAME);
}
@@ -749,8 +749,15 @@ public class MetaTableAccessor {
if (replicaId < 0) {
break;
}
-
- locations.add(getRegionLocation(r, regionInfo, replicaId));
+ byte[] daughter = getDaughterReplicaQualifier(replicaId);
+ // if we have a daughter_ column the value of that is the
+ // hri of the (split) parent replica. Let's use that as the location of the
+ // daughter replica until the daughter replica is actually created and assigned
+ HRegionInfo h = getHRegionInfo(r, daughter);
+ if (h == null) {
+ h = regionInfo;
+ }
+ locations.add(getRegionLocation(r, h, replicaId));
}
return new RegionLocations(locations);
@@ -1211,6 +1218,9 @@ public class MetaTableAccessor {
HRegionInfo copyOfParent = new HRegionInfo(parent);
copyOfParent.setOffline(true);
copyOfParent.setSplit(true);
+ byte[] key = MetaTableAccessor.getMetaKeyForRegion(parent);
+ Result result = meta.get(new Get(key));
+ RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
//Put for parent
Put putParent = makePutFromRegionInfo(copyOfParent);
@@ -1222,6 +1232,24 @@ public class MetaTableAccessor {
addLocation(putA, sn, 1, splitA.getReplicaId()); //new regions, openSeqNum = 1 is fine.
addLocation(putB, sn, 1, splitB.getReplicaId());
+ // bootstrap the daughter replicas to point to the locations of the old parent replicas
+ // via a column daughter_
+ for (int i = 1; i < rl.size(); i++) {
+ ServerName s;
+ byte[] parentHri;
+ if (rl.getRegionLocation(i) == null) { // if null then don't know anything about replica
+ continue;
+ } else {
+ s = rl.getRegionLocation(i).getServerName();
+ parentHri = rl.getRegionLocation(i).getRegionInfo().toByteArray();
+ }
+ addLocation(putA, s, rl.getRegionLocation(i).getSeqNum(), i);
+ addLocation(putB, s, rl.getRegionLocation(i).getSeqNum(), i);
+ putA.addImmutable(HConstants.CATALOG_FAMILY,
+ MetaTableAccessor.getDaughterReplicaQualifier(i), parentHri);
+ putB.addImmutable(HConstants.CATALOG_FAMILY,
+ MetaTableAccessor.getDaughterReplicaQualifier(i), parentHri);
+ }
byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER);
multiMutate(meta, tableRow, putParent, putA, putB);
@@ -1229,6 +1257,18 @@ public class MetaTableAccessor {
meta.close();
}
}
+ /**
+ * Returns the column qualifier for daughter column for replicaId
+ * @param replicaId the replicaId of the region
+ * @return a byte[] for daughter column qualifier
+ */
+ @VisibleForTesting
+ public static byte[] getDaughterReplicaQualifier(int replicaId) {
+ return replicaId == 0
+ ? HConstants.DAUGHTER_QUALIFIER
+ : Bytes.toBytes(HConstants.DAUGHTER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
+ + String.format(HRegionInfo.REPLICA_ID_FORMAT, replicaId));
+ }
/**
* Performs an atomic multi-Mutate operation against the given table.
@@ -1295,9 +1335,20 @@ public class MetaTableAccessor {
HRegionInfo regionInfo, ServerName sn, long openSeqNum)
throws IOException {
// region replicas are kept in the primary region's row
- Put put = new Put(getMetaKeyForRegion(regionInfo));
+ byte[] metaRow = getMetaKeyForRegion(regionInfo);
+ Put put = new Put(metaRow);
addLocation(put, sn, openSeqNum, regionInfo.getReplicaId());
- putToMetaTable(hConnection, put);
+ put.addImmutable(HConstants.CATALOG_FAMILY,
+ MetaTableAccessor.getServerColumn(regionInfo.getReplicaId()),
+ Bytes.toBytes(sn.getHostAndPort()));
+ if (regionInfo.getReplicaId() != 0) {
+ Delete d = new Delete(metaRow);
+ d.deleteColumn(HConstants.CATALOG_FAMILY,
+ MetaTableAccessor.getDaughterReplicaQualifier(regionInfo.getReplicaId()));
+ multiMutate(MetaTableAccessor.getMetaHTable(hConnection), metaRow, put, d);
+ } else {
+ putToMetaTable(hConnection, put);
+ }
LOG.info("Updated row " + regionInfo.getRegionNameAsString() +
" with server=" + sn);
}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index c2709f5..ce22915 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -425,6 +425,11 @@ public final class HConstants {
public static final byte [] SERVERNAME_QUALIFIER = Bytes.toBytes(SERVERNAME_QUALIFIER_STR);
+ /** The daughter qualifier */
+ public static final String DAUGHTER_QUALIFIER_STR = "daughter";
+ /** The daughter qualifier */
+ public static final byte [] DAUGHTER_QUALIFIER = Bytes.toBytes(DAUGHTER_QUALIFIER_STR);
+
/** The lower-half split region column qualifier */
public static final byte [] SPLITA_QUALIFIER = Bytes.toBytes("splitA");
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 2888e1e..5554c2b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -30,6 +30,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
+import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
@@ -3740,6 +3741,13 @@ public class AssignmentManager extends ZooKeeperListener {
}
if (et == EventType.RS_ZK_REGION_SPLIT) {
+ // split replicas
+ try {
+ doSplittingOfReplicas(rs_p.getRegion(), hri_a, hri_b,
+ ((MasterServices)server).getTableDescriptors().get(p.getTable()).getRegionReplication());
+ } catch (IOException e) {
+ LOG.warn("Failed to handle splits for replica regions " + e);
+ }
LOG.debug("Handling SPLIT event for " + encodedName + "; deleting node");
// Remove region from ZK
try {
@@ -3772,6 +3780,47 @@ public class AssignmentManager extends ZooKeeperListener {
return true;
}
+ private void doSplittingOfReplicas(final HRegionInfo parentHri, final HRegionInfo hri_a,
+ final HRegionInfo hri_b, final int numReplicas) {
+ // create new regions for the replica, and assign them to match with the
+ // current replica assignments. If replica1 of parent is assigned to RS1,
+ // the replica1s of daughters will be on the same machine
+ Map map = new HashMap();
+ for (int i = 1; i < numReplicas; i++) {
+ prepareDaughterReplicaForAssignment(hri_a, parentHri, i, map);
+ prepareDaughterReplicaForAssignment(hri_b, parentHri, i, map);
+ }
+ try {
+ assign(map);
+ } catch (IOException e) {
+ LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)");
+ } catch (InterruptedException e) {
+ LOG.warn("Caught exception " + e + " while trying to assign replica(s) of daughter(s)");
+ }
+ // unassign the old replicas
+ for (int i = 1; i < numReplicas; i++) {
+ HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(parentHri, i);
+ LOG.debug("Unassigning replica for split parent " + h);
+ unassign(RegionReplicaUtil.getRegionInfoForReplica(parentHri, i));
+ }
+ }
+
+ private void prepareDaughterReplicaForAssignment(HRegionInfo daughterHri, HRegionInfo parentHri,
+ int replicaId, Map map) {
+ HRegionInfo parentReplica = RegionReplicaUtil.getRegionInfoForReplica(parentHri, replicaId);
+ HRegionInfo daughterReplica = RegionReplicaUtil.getRegionInfoForReplica(daughterHri,
+ replicaId);
+ LOG.debug("Created replica region for daughter " + daughterReplica);
+ ServerName sn;
+ if ((sn = regionStates.getRegionServerOfRegion(parentReplica)) != null) {
+ map.put(daughterReplica, sn);
+ } else {
+ List servers = serverManager.getOnlineServersList();
+ sn = servers.get((new Random(System.currentTimeMillis())).nextInt(servers.size()));
+ map.put(daughterReplica, sn);
+ }
+ }
+
/**
* A region is offline. The new state should be the specified one,
* if not null. If the specified state is null, the new state is Offline.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
index 051002e..636c194 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
@@ -23,7 +23,11 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -43,6 +47,7 @@ import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.Bytes;
@@ -109,12 +114,13 @@ public class CatalogJanitor extends Chore {
/**
* Scans hbase:meta and returns a number of scanned rows, and a map of merged
* regions, and an ordered map of split parents.
- * @return triple of scanned rows, map of merged regions and map of split
- * parent regioninfos
+ * @return triple of scanned rows, map of merged regions and a pair consisting of
+ * map of split parent regioninfos and the set of parents to which daughters still have
+ * back references to [TODO: the signature is ugly; fix it]
* @throws IOException
*/
- Triple, Map> getMergedRegionsAndSplitParents()
- throws IOException {
+ Triple, Pair