diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index a91f8e4..0b8b8b0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1369,6 +1369,10 @@ public class AssignmentManager implements ServerListener { return regionStates.getSnapShotOfAssignment(regions); } + public Map> getSnapShotOfAssignment() { + return regionStates.getSnapShotOfAssignment(); + } + // ============================================================================================ // TODO: UTILS/HELPERS? // ============================================================================================ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index 26a6884..c47e9aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -501,6 +501,27 @@ public class RegionStates { return result; } + public Map> getSnapShotOfAssignment() { + final Map> result = new HashMap>(); + for (RegionStateNode node : regionsMap.values()) { + if (node == null) continue; + + // TODO: State.OPEN + final ServerName serverName = node.getRegionLocation(); + if (serverName == null) continue; + + List serverRegions = result.get(serverName); + if (serverRegions == null) { + serverRegions = new ArrayList(); + result.put(serverName, serverRegions); + } + + serverRegions.add(node.getRegionInfo()); + } + return result; + } + + public Map getRegionAssignments() { final HashMap assignments = new HashMap(); for (RegionStateNode node: regionsMap.values()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 51c2758..94d7e71 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -741,7 +741,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int region = regionsToIndex.get(regionInfo); int primary = regionIndexToPrimaryIndex[region]; - // there is a subset relation for server < host < rack // check server first @@ -1317,6 +1316,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { rackManager); } + protected Cluster createClusterForRetain(List servers, + Collection regions) { + // Get the snapshot of the current assignments for the regions in question, and then create + // a cluster out of it. Note that we might have replicas already assigned to some servers + // earlier. So we want to get the snapshot to see those assignments, but this will only contain + // replicas of the regions that are passed (for performance). + Map> clusterState = getRegionAssignmentsByServer(); + + for (ServerName server : servers) { + if (!clusterState.containsKey(server)) { + clusterState.put(server, EMPTY_REGION_LIST); + } + } + return new Cluster(regions, clusterState, null, this.regionFinder, rackManager); + } + private List findIdleServers(List servers) { return this.services.getServerManager() .getOnlineServersListWithPredicator(servers, IDLE_SERVER_PREDICATOR); @@ -1470,7 +1485,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // If servers from prior assignment aren't present, then lets do randomAssignment on regions. if (randomAssignRegions.size() > 0) { - Cluster cluster = createCluster(servers, regions.keySet()); + Cluster cluster = createClusterForRetain(servers, regions.keySet()); for (Map.Entry> entry : assignments.entrySet()) { ServerName sn = entry.getKey(); for (RegionInfo region : entry.getValue()) { @@ -1480,7 +1495,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { for (RegionInfo region : randomAssignRegions) { ServerName target = randomAssignment(cluster, region, servers); assignments.get(target).add(region); - cluster.doAssignRegion(region, target); numRandomAssignments++; } } @@ -1583,6 +1597,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } + protected Map> getRegionAssignmentsByServer() { + if (this.services != null && this.services.getAssignmentManager() != null) { + return this.services.getAssignmentManager().getSnapShotOfAssignment(); + } else { + return new HashMap<>(); + } + } + @Override public void onConfigurationChange(Configuration conf) { } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index b938d28..262462c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -1442,7 +1442,9 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { */ public Table createTable(TableName tableName, byte[][] families, byte[][] splitKeys, final Configuration c) throws IOException { - return createTable(new HTableDescriptor(tableName), families, splitKeys, c); + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.setRegionReplication(3); + return createTable(htd, families, splitKeys, c); } /** @@ -1744,6 +1746,7 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { .setMaxVersions(maxVersions); desc.addFamily(hcd); } + desc.setRegionReplication(3); return desc; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java index f7cc38a..a43f950 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java @@ -19,6 +19,10 @@ package org.apache.hadoop.hbase.master.procedure; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -27,14 +31,18 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.regionserver.Region; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.junit.After; import org.junit.Before; import org.junit.ClassRule; @@ -68,7 +76,7 @@ public class TestServerCrashProcedure { public void setup() throws Exception { this.util = new HBaseTestingUtility(); setupConf(this.util.getConfiguration()); - this.util.startMiniCluster(3); + this.util.startMiniCluster(4); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); serverCrashProcMetrics = this.util.getHBaseCluster().getMaster().getMasterMetrics() @@ -92,12 +100,12 @@ public class TestServerCrashProcedure { testRecoveryAndDoubleExecution(false, false); } - @Test + //@Test public void testRecoveryAndDoubleExecutionOnRsWithMeta() throws Exception { testRecoveryAndDoubleExecution(true, true); } - @Test + //@Test public void testRecoveryAndDoubleExecutionOnRsWithoutMeta() throws Exception { testRecoveryAndDoubleExecution(false, true); } @@ -156,6 +164,20 @@ public class TestServerCrashProcedure { ProcedureTestingUtility.waitProcedure(procExec, procId); } // Assert all data came back. + List regionInfos = new ArrayList(); + for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) { + regionInfos.clear(); + for (Region r : rs.getRegionServer().getRegions(t.getName())) { + LOG.info("The region is " + r.getRegionInfo() + " the location is " + + rs.getRegionServer().getServerName()); + if (contains(regionInfos, r.getRegionInfo())) { + LOG.error("Am exiting"); + fail("Splitted regions should not be assigned to same region server"); + } else { + regionInfos.add(r.getRegionInfo()); + } + } + } assertEquals(count, util.countRows(t)); assertEquals(checksum, util.checksumRows(t)); } catch(Throwable throwable) { @@ -166,6 +188,15 @@ public class TestServerCrashProcedure { } } + private boolean contains(List regionInfos, RegionInfo regionInfo) { + for (RegionInfo info : regionInfos) { + if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) { + return true; + } + } + return false; + } + private void collectMasterMetrics() { serverCrashSubmittedCount = serverCrashProcMetrics.getSubmittedCounter().getCount(); serverCrashFailedCount = serverCrashProcMetrics.getFailedCounter().getCount();