diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index 182b34f..fc30cbc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -629,7 +629,8 @@ public class MetaReader { * @param replicaId the replicaId of the region * @return a byte[] for server column qualifier */ - protected static byte[] getServerColumn(int replicaId) { + @VisibleForTesting + public static byte[] getServerColumn(int replicaId) { return replicaId == 0 ? HConstants.SERVER_QUALIFIER : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER @@ -641,7 +642,8 @@ public class MetaReader { * @param replicaId the replicaId of the region * @return a byte[] for server start code column qualifier */ - protected static byte[] getStartCodeColumn(int replicaId) { + @VisibleForTesting + public static byte[] getStartCodeColumn(int replicaId) { return replicaId == 0 ? HConstants.STARTCODE_QUALIFIER : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER @@ -653,7 +655,8 @@ public class MetaReader { * @param replicaId the replicaId of the region * @return a byte[] for seqNum column qualifier */ - protected static byte[] getSeqNumColumn(int replicaId) { + @VisibleForTesting + public static byte[] getSeqNumColumn(int replicaId) { return replicaId == 0 ? HConstants.SEQNUM_QUALIFIER : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 7ada60f..bec95aa 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -39,6 +39,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseIOException; @@ -494,7 +495,7 @@ public class HBaseAdmin implements Abortable, Closeable { } catch (SocketTimeoutException ste) { LOG.warn("Creating " + desc.getTableName() + " took too long", ste); } - int numRegs = splitKeys == null ? 1 : splitKeys.length + 1; + int numRegs = (splitKeys == null ? 1 : splitKeys.length + 1) * desc.getRegionReplication(); int prevRegCount = 0; boolean doneWithMetaScan = false; for (int tries = 0; tries < this.numRetries * this.retryLongerMultiplier; @@ -505,19 +506,27 @@ public class HBaseAdmin implements Abortable, Closeable { MetaScannerVisitor visitor = new MetaScannerVisitorBase() { @Override public boolean processRow(Result rowResult) throws IOException { - HRegionInfo info = HRegionInfo.getHRegionInfo(rowResult); - if (info == null) { + RegionLocations list = MetaReader.getRegionLocations(rowResult); + if (list == null) { LOG.warn("No serialized HRegionInfo in " + rowResult); return true; } - if (!info.getTable().equals(desc.getTableName())) { + HRegionLocation l = list.getRegionLocation(); + if (l == null) { + return true; + } + if (!l.getRegionInfo().getTable().equals(desc.getTableName())) { return false; } - ServerName serverName = HRegionInfo.getServerName(rowResult); - // Make sure that regions are assigned to server - if (!(info.isOffline() || info.isSplit()) && serverName != null - && serverName.getHostAndPort() != null) { - actualRegCount.incrementAndGet(); + if (l.getRegionInfo().isOffline() || l.getRegionInfo().isSplit()) return true; + HRegionLocation[] locations = list.getRegionLocations(); + for (HRegionLocation location : locations) { + if (location == null) continue; + ServerName serverName = location.getServerName(); + // Make sure that regions are assigned to server + if (serverName != null && serverName.getHostAndPort() != null) { + actualRegCount.incrementAndGet(); + } } return true; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java index abe9bf5..1e85993 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java @@ -62,4 +62,8 @@ public class RegionReplicaUtil { return getRegionInfoForReplica(regionInfo, DEFAULT_REPLICA_ID); } + /** @return true if this region is a default replica for the region */ + public static boolean isDefaultReplica(HRegionInfo hri) { + return hri.getReplicaId() == DEFAULT_REPLICA_ID; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java index 5b5446b..ce200fa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType; @@ -270,7 +271,9 @@ public class MetaEditor extends MetaReader { throws IOException { List puts = new ArrayList(); for (HRegionInfo regionInfo : regionInfos) { - puts.add(makePutFromRegionInfo(regionInfo)); + if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { + puts.add(makePutFromRegionInfo(regionInfo)); + } } putsToMetaTable(catalogTracker, puts); LOG.info("Added " + puts.size()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index dc2f93a..db05eb0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.Server; @@ -56,6 +58,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.executor.EventHandler; @@ -2548,19 +2551,50 @@ public class AssignmentManager extends ZooKeeperListener { boolean retainAssignment = server.getConfiguration(). getBoolean("hbase.master.startup.retainassign", true); + Set regionsFromMetaScan = allRegions.keySet(); if (retainAssignment) { assign(allRegions); } else { - List regions = new ArrayList(allRegions.keySet()); + List regions = new ArrayList(regionsFromMetaScan); assign(regions); } - for (HRegionInfo hri : allRegions.keySet()) { + for (HRegionInfo hri : regionsFromMetaScan) { TableName tableName = hri.getTable(); if (!zkTable.isEnabledTable(tableName)) { setEnabledTable(tableName); } } + // assign all the replicas that were not recorded in the meta + assign(replicaRegionsNotRecordedInMeta(regionsFromMetaScan, (MasterServices)server)); + } + + /** + * Get a list of replica regions that are: + * not recorded in meta yet. We might not have recorded the locations + * for the replicas since the replicas may not have been online yet, master restarted + * in the middle of assigning, ZK erased, etc. + * @param regionsRecordedInMeta the list of regions we know are recorded in meta + * either as a default, or, as the location of a replica + * @param master + * @return list of replica regions + * @throws IOException + */ + public static List replicaRegionsNotRecordedInMeta( + Set regionsRecordedInMeta, MasterServices master)throws IOException { + List regionsNotRecordedInMeta = new ArrayList(); + for (HRegionInfo hri : regionsRecordedInMeta) { + TableName table = hri.getTable(); + HTableDescriptor htd = master.getTableDescriptors().get(table); + // look at the HTD for the replica count. That's the source of truth + int desiredRegionReplication = htd.getRegionReplication(); + for (int i = 0; i < desiredRegionReplication; i++) { + HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hri, i); + if (regionsRecordedInMeta.contains(replica)) continue; + regionsNotRecordedInMeta.add(replica); + } + } + return regionsNotRecordedInMeta; } /** @@ -2612,62 +2646,66 @@ public class AssignmentManager extends ZooKeeperListener { new TreeMap>(); // Iterate regions in META for (Result result : results) { - Pair region = HRegionInfo.getHRegionInfoAndServerName(result); - if (region == null) continue; - HRegionInfo regionInfo = region.getFirst(); - ServerName regionLocation = region.getSecond(); - if (regionInfo == null) continue; - regionStates.createRegionState(regionInfo); - if (regionStates.isRegionInState(regionInfo, State.SPLIT)) { - // Split is considered to be completed. If the split znode still - // exists, the region will be put back to SPLITTING state later - LOG.debug("Region " + regionInfo.getRegionNameAsString() - + " split is completed. Hence need not add to regions list"); - continue; - } - TableName tableName = regionInfo.getTable(); - if (regionLocation == null) { - // regionLocation could be null if createTable didn't finish properly. - // When createTable is in progress, HMaster restarts. - // Some regions have been added to hbase:meta, but have not been assigned. - // When this happens, the region's table must be in ENABLING state. - // It can't be in ENABLED state as that is set when all regions are - // assigned. - // It can't be in DISABLING state, because DISABLING state transitions - // from ENABLED state when application calls disableTable. - // It can't be in DISABLED state, because DISABLED states transitions - // from DISABLING state. - if (!enablingTables.contains(tableName)) { - LOG.warn("Region " + regionInfo.getEncodedName() + - " has null regionLocation." + " But its table " + tableName + - " isn't in ENABLING state."); - } - } else if (!onlineServers.contains(regionLocation)) { - // Region is located on a server that isn't online - List offlineRegions = offlineServers.get(regionLocation); - if (offlineRegions == null) { - offlineRegions = new ArrayList(1); - offlineServers.put(regionLocation, offlineRegions); - } - offlineRegions.add(regionInfo); - // need to enable the table if not disabled or disabling or enabling - // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); - } - } else { - // Region is being served and on an active server - // add only if region not in disabled or enabling table - if (!disabledOrEnablingTables.contains(tableName)) { - regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation); - regionStates.regionOnline(regionInfo, regionLocation); + HRegionInfo regionInfo; + HRegionLocation[] locations = MetaReader.getRegionLocations(result).getRegionLocations(); + if (locations == null) continue; + // Do the operations for all the replicas + for (HRegionLocation hrl : locations) { + if (hrl == null) continue; + ServerName regionLocation = hrl.getServerName(); + regionInfo = hrl.getRegionInfo(); + regionStates.createRegionState(regionInfo); + if (regionStates.isRegionInState(regionInfo, State.SPLIT)) { + // Split is considered to be completed. If the split znode still + // exists, the region will be put back to SPLITTING state later + LOG.debug("Region " + regionInfo.getRegionNameAsString() + + " split is completed. Hence need not add to regions list"); + continue; } - // need to enable the table if not disabled or disabling or enabling - // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + TableName tableName = regionInfo.getTable(); + if (regionLocation == null) { + // regionLocation could be null if createTable didn't finish properly. + // When createTable is in progress, HMaster restarts. + // Some regions have been added to hbase:meta, but have not been assigned. + // When this happens, the region's table must be in ENABLING state. + // It can't be in ENABLED state as that is set when all regions are + // assigned. + // It can't be in DISABLING state, because DISABLING state transitions + // from ENABLED state when application calls disableTable. + // It can't be in DISABLED state, because DISABLED states transitions + // from DISABLING state. + if (!enablingTables.contains(tableName)) { + LOG.warn("Region " + regionInfo.getEncodedName() + + " has null regionLocation." + " But its table " + tableName + + " isn't in ENABLING state."); + } + } else if (!onlineServers.contains(regionLocation)) { + // Region is located on a server that isn't online + List offlineRegions = offlineServers.get(regionLocation); + if (offlineRegions == null) { + offlineRegions = new ArrayList(1); + offlineServers.put(regionLocation, offlineRegions); + } + offlineRegions.add(regionInfo); + // need to enable the table if not disabled or disabling or enabling + // this will be used in rolling restarts + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } + } else { + // Region is being served and on an active server + // add only if region not in disabled or enabling table + if (!disabledOrEnablingTables.contains(tableName)) { + regionStates.updateRegionState(regionInfo, State.OPEN, regionLocation); + regionStates.regionOnline(regionInfo, regionLocation); + } + // need to enable the table if not disabled or disabling or enabling + // this will be used in rolling restarts + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } } @@ -3517,4 +3555,9 @@ public class AssignmentManager extends ZooKeeperListener { public LoadBalancer getBalancer() { return this.balancer; } + + public Pair>, Map>> + getSnapShotOfAssignment(List infos) { + return getRegionStates().getRegionAssignments(infos); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 9a01587..31d2a7e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1786,18 +1786,30 @@ MasterServices, Server { private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor, byte[][] splitKeys) { HRegionInfo[] hRegionInfos = null; + int numRegionReplicas = hTableDescriptor.getRegionReplication(); + if (numRegionReplicas <= 0) { + LOG.warn("Invalid number of replicas per region in the table descriptor. Setting it to 1."); + numRegionReplicas = 1; + } + long regionId = System.currentTimeMillis(); if (splitKeys == null || splitKeys.length == 0) { - hRegionInfos = new HRegionInfo[]{ - new HRegionInfo(hTableDescriptor.getTableName(), null, null)}; + hRegionInfos = new HRegionInfo[numRegionReplicas]; + for (int i = 0; i < numRegionReplicas; i++) { + hRegionInfos[i] = new HRegionInfo(hTableDescriptor.getTableName(), null, null, + false, regionId, (short)i); + } } else { int numRegions = splitKeys.length + 1; - hRegionInfos = new HRegionInfo[numRegions]; + hRegionInfos = new HRegionInfo[numRegions * numRegionReplicas]; byte[] startKey = null; byte[] endKey = null; for (int i = 0; i < numRegions; i++) { endKey = (i == splitKeys.length) ? null : splitKeys[i]; - hRegionInfos[i] = - new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey); + for (int j = 0; j < numRegionReplicas; j++) { + hRegionInfos[i*numRegionReplicas + j] = + new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey, + false, regionId, (short)j); + } startKey = endKey; } } @@ -3202,5 +3214,4 @@ MasterServices, Server { } return tableNames; } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java index 782e04e..0f6737b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -66,4 +67,20 @@ public class RackManager { return UNKNOWN_RACK; } + + /** + * Same as {@link #getRack(ServerName)} except that a list is passed + * @param servers + * @return + */ + public List getRack(List servers) { + // just a note - switchMapping caches results (at least the implementation should unless the + // resolution is really a lightweight process) + List serversAsString = new ArrayList(servers.size()); + for (ServerName server : servers) { + serversAsString.add(server.getHostname()); + } + List racks = switchMapping.resolve(serversAsString); + return racks; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index eaa57fc..23fb435 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; @@ -75,6 +76,12 @@ public class RegionStates { private final Map> serverHoldings; /** + * Maintains for each region (includes default and all replicas) the set of servers + * it is currently hosted in. + */ + private final Map> regionReplicaToServer; + + /** * Region to server assignment map. * Contains the server a given region is currently assigned to. */ @@ -111,6 +118,7 @@ public class RegionStates { private final ServerManager serverManager; private final Server server; + private final RackManager rackManager; // The maximum time to keep a log split info in region states map static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime"; @@ -120,12 +128,14 @@ public class RegionStates { regionStates = new HashMap(); regionsInTransition = new HashMap(); serverHoldings = new HashMap>(); + regionReplicaToServer = new HashMap>(); regionAssignments = new TreeMap(); lastAssignments = new HashMap(); processedServers = new HashMap(); deadServers = new HashMap(); this.serverManager = serverManager; this.server = master; + this.rackManager = new RackManager(master.getConfiguration()); } /** @@ -136,6 +146,54 @@ public class RegionStates { return (Map)regionAssignments.clone(); } + /** + * Return the replicas for the regions grouped by ServerName and corresponding Racks + * @param regions + * @return a pair containing the groupings as Maps + */ + synchronized Pair>, Map>> + getRegionAssignments(List regions) { + Map> replicaAssignments = + new TreeMap>(); + Map> rackAssignments = + new TreeMap>(); + List servers = new ArrayList(); + + //what is being computed in the method below can also be maintained inline + //(in addToServerHoldings/removeFromServerHoldings) and on request, cloned and + //returned (just like getRegionAssignments() does), but in practice this + //method will be called with only a few regions and shouldn't be a big deal. Clone + //might be more expensive. + for (HRegionInfo region : regions) { + HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(region); + Set serversHostingRegionReplicas = regionReplicaToServer.get(defaultReplica); + if (serversHostingRegionReplicas != null) { + for (ServerName server : serversHostingRegionReplicas) { + Set regionsOnServer = replicaAssignments.get(server); + if (regionsOnServer == null) { + regionsOnServer = new HashSet(2); + replicaAssignments.put(server, regionsOnServer); + } + regionsOnServer.add(defaultReplica); + servers.add(server); + } + } + } + + List racks = rackManager.getRack(servers); + for (int i = 0; i < servers.size(); i++) { + Set r = replicaAssignments.get(servers.get(i)); + Set regionsOnRack = rackAssignments.get(racks.get(i)); + if (regionsOnRack == null) { + regionsOnRack = new HashSet(); + rackAssignments.put(racks.get(i), regionsOnRack); + } + regionsOnRack.addAll(r); + } + return new Pair>, + Map>>(replicaAssignments, rackAssignments); + } + public synchronized ServerName getRegionServerOfRegion(HRegionInfo hri) { return regionAssignments.get(hri); } @@ -375,23 +433,46 @@ public class RegionStates { ServerName oldServerName = regionAssignments.put(hri, serverName); if (!serverName.equals(oldServerName)) { LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName); - Set regions = serverHoldings.get(serverName); - if (regions == null) { - regions = new HashSet(); - serverHoldings.put(serverName, regions); - } - regions.add(hri); + addToServerHoldings(serverName, hri); if (oldServerName != null) { LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName); - Set oldRegions = serverHoldings.get(oldServerName); - oldRegions.remove(hri); - if (oldRegions.isEmpty()) { - serverHoldings.remove(oldServerName); - } + removeFromServerHoldings(oldServerName, hri); } } } + private void addToServerHoldings(ServerName serverName, HRegionInfo hri) { + Set regions = serverHoldings.get(serverName); + if (regions == null) { + regions = new HashSet(); + serverHoldings.put(serverName, regions); + } + regions.add(hri); + + HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); + Set serversHostingReplicaOfRegion = + regionReplicaToServer.get(defaultReplica); + if (serversHostingReplicaOfRegion == null) { + serversHostingReplicaOfRegion = new HashSet(); + regionReplicaToServer.put(defaultReplica, serversHostingReplicaOfRegion); + } + serversHostingReplicaOfRegion.add(serverName); + } + + private void removeFromServerHoldings(ServerName serverName, HRegionInfo hri) { + Set oldRegions = serverHoldings.get(serverName); + oldRegions.remove(hri); + if (oldRegions.isEmpty()) { + serverHoldings.remove(serverName); + } + HRegionInfo defaultReplica = RegionReplicaUtil.getRegionInfoForDefaultReplica(hri); + Set servers = regionReplicaToServer.get(defaultReplica); + servers.remove(serverName); + if (servers.isEmpty()) { + regionReplicaToServer.remove(defaultReplica); + } + } + /** * A dead server's hlogs have been split so that all the regions * used to be open on it can be safely assigned now. Mark them assignable. @@ -459,11 +540,7 @@ public class RegionStates { ServerName oldServerName = regionAssignments.remove(hri); if (oldServerName != null) { LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName); - Set oldRegions = serverHoldings.get(oldServerName); - oldRegions.remove(hri); - if (oldRegions.isEmpty()) { - serverHoldings.remove(oldServerName); - } + removeFromServerHoldings(oldServerName, hri); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java index b98c860..2738d4b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java @@ -33,11 +33,14 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.catalog.MetaReader.Visitor; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan; @@ -100,20 +103,31 @@ public class SnapshotOfRegionAssignmentFromMeta { public boolean visit(Result result) throws IOException { try { if (result == null || result.isEmpty()) return true; - Pair regionAndServer = - HRegionInfo.getHRegionInfoAndServerName(result); - HRegionInfo hri = regionAndServer.getFirst(); - if (hri == null) return true; + HRegionInfo hri = HRegionInfo.getHRegionInfo(result); + if (hri == null) return true; if (hri.getTable() == null) return true; if (disabledTables.contains(hri.getTable())) { return true; } // Are we to include split parents in the list? if (excludeOfflinedSplitParents && hri.isSplit()) return true; - // Add the current assignment to the snapshot - addAssignment(hri, regionAndServer.getSecond()); - addRegion(hri); - + RegionLocations locations = MetaReader.getRegionLocations(result); + HRegionLocation[] hrl = locations.getRegionLocations(); + + // Add the current assignment to the snapshot for all replicas + if (hrl != null) { + for (int i = 0; i < hrl.length; i++) { + hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i); + addAssignment(hri, hrl[i].getServerName()); + addRegion(hri); + } + } else { + // add a 'null' assignment. Required for map.keyset operation on the + // return value from getRegionToRegionServerMap. The keyset should + // still contain the hri although the region is presently not assigned + addAssignment(hri, null); + addRegion(hri); + } // the code below is to handle favored nodes byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java index b9084b9..dc7c653 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java @@ -27,7 +27,6 @@ import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -39,6 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.regionserver.HRegion; /** @@ -111,7 +111,13 @@ public abstract class ModifyRegionUtils { CompletionService completionService = new ExecutorCompletionService( regionOpenAndInitThreadPool); List regionInfos = new ArrayList(); + int defaultReplicas = 0; for (final HRegionInfo newRegion : newRegions) { + regionInfos.add(newRegion); + if (!RegionReplicaUtil.isDefaultReplica(newRegion)) { + continue; + } + defaultReplicas++; completionService.submit(new Callable() { @Override public HRegionInfo call() throws IOException { @@ -121,10 +127,8 @@ public abstract class ModifyRegionUtils { } try { // wait for all regions to finish creation - for (int i = 0; i < regionNumber; i++) { - Future future = completionService.take(); - HRegionInfo regionInfo = future.get(); - regionInfos.add(regionInfo); + for (int i = 0; i < defaultReplicas; i++) { + completionService.take().get(); } } catch (InterruptedException e) { LOG.error("Caught " + e + " during region creation"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java new file mode 100644 index 0000000..8f51c51 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java @@ -0,0 +1,328 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.RegionLocations; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.catalog.MetaReader.Visitor; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestMasterOperationsForRegionReplicas { + final static Log LOG = LogFactory.getLog(TestRegionPlacement.class); + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static HBaseAdmin admin; + private static int numSlaves = 2; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setBoolean("hbase.tests.use.shortcircuit.reads", false); + TEST_UTIL.startMiniCluster(numSlaves); + admin = new HBaseAdmin(conf); + while(admin.getClusterStatus().getServers().size() != numSlaves) { + Thread.sleep(100); + } + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testCreateTableWithSingleReplica() throws Exception { + final int numRegions = 3; + final int numReplica = 1; + final TableName table = TableName.valueOf("singleReplicaTable"); + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.setRegionReplication(numReplica); + desc.addFamily(new HColumnDescriptor("family")); + admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions); + + CatalogTracker ct = new CatalogTracker(TEST_UTIL.getConfiguration()); + validateNumberOfRowsInMeta(table, numRegions, ct); + List hris = MetaReader.getTableRegions(ct, table); + assert(hris.size() == numRegions * numReplica); + } finally { + admin.disableTable(table); + admin.deleteTable(table); + } + } + + @Test + public void testCreateTableWithMultipleReplicas() throws Exception { + final TableName table = TableName.valueOf("fooTable"); + final int numRegions = 3; + final int numReplica = 2; + try { + HTableDescriptor desc = new HTableDescriptor(table); + desc.setRegionReplication(numReplica); + desc.addFamily(new HColumnDescriptor("family")); + admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions); + TEST_UTIL.waitTableEnabled(table.getName()); + CatalogTracker ct = new CatalogTracker(TEST_UTIL.getConfiguration()); + validateNumberOfRowsInMeta(table, numRegions, ct); + + List hris = MetaReader.getTableRegions(ct, table); + assert(hris.size() == numRegions * numReplica); + // check that the master created expected number of RegionState objects + for (int i = 0; i < numRegions; i++) { + for (int j = 0; j < numReplica; j++) { + HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j); + RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() + .getRegionStates().getRegionState(replica); + assert (state != null); + } + } + // TODO: HBASE-10351 should uncomment the following tests (since the tests assume region placements are handled) +// List metaRows = MetaReader.fullScan(ct); +// int numRows = 0; +// for (Result result : metaRows) { +// RegionLocations locations = MetaReader.getRegionLocations(result); +// HRegionInfo hri = locations.getRegionLocation().getRegionInfo(); +// if (!hri.getTable().equals(table)) continue; +// numRows += 1; +// HRegionLocation[] servers = locations.getRegionLocations(); +// // have two locations for the replicas of a region, and the locations should be different +// assert(servers.length == 2); +// assert(!servers[0].equals(servers[1])); +// } +// assert(numRows == numRegions); +// +// // The same verification of the meta as above but with the SnapshotOfRegionAssignmentFromMeta +// // class +// validateFromSnapshotFromMeta(table, numRegions, numReplica, ct); +// +// // Now kill the master, restart it and see if the assignments are kept +// ServerName master = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); +// TEST_UTIL.getHBaseClusterInterface().stopMaster(master); +// TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000); +// TEST_UTIL.getHBaseClusterInterface().startMaster(master.getHostname()); +// TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); +// for (int i = 0; i < numRegions; i++) { +// for (int j = 0; j < numReplica; j++) { +// HRegionInfo replica = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j); +// RegionState state = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() +// .getRegionStates().getRegionState(replica); +// assert (state != null); +// } +// } +// validateFromSnapshotFromMeta(table, numRegions, numReplica, ct); +// +// // Now shut the whole cluster down, and verify the assignments are kept so that the +// // availability constraints are met. +// TEST_UTIL.getConfiguration().setBoolean("hbase.master.startup.retainassign", true); +// TEST_UTIL.shutdownMiniHBaseCluster(); +// TEST_UTIL.startMiniHBaseCluster(1, numSlaves); +// TEST_UTIL.waitTableEnabled(table.getName()); +// ct = new CatalogTracker(TEST_UTIL.getConfiguration()); +// validateFromSnapshotFromMeta(table, numRegions, numReplica, ct); +// +// // Now shut the whole cluster down, and verify regions are assigned even if there is only +// // one server running +// TEST_UTIL.shutdownMiniHBaseCluster(); +// TEST_UTIL.startMiniHBaseCluster(1, 1); +// TEST_UTIL.waitTableEnabled(table.getName()); +// ct = new CatalogTracker(TEST_UTIL.getConfiguration()); +// validateSingleRegionServerAssignment(ct, numRegions, numReplica); +// for (int i = 1; i < numSlaves; i++) { //restore the cluster +// TEST_UTIL.getMiniHBaseCluster().startRegionServer(); +// } + + //TODO: HBASE-10361 patch should uncomment the test below +// //check on alter table +// admin.disableTable(table); +// assert(admin.isTableDisabled(table)); +// //increase the replica +// desc.setRegionReplication(numReplica + 1); +// admin.modifyTable(table, desc); +// admin.enableTable(table); +// assert(admin.isTableEnabled(table)); +// List regions = TEST_UTIL.getMiniHBaseCluster().getMaster() +// .getAssignmentManager().getRegionStates().getRegionsOfTable(table); +// assert(regions.size() == numRegions * (numReplica + 1)); +// +// //decrease the replica(earlier, table was modified to have a replica count of numReplica + 1) +// admin.disableTable(table); +// desc.setRegionReplication(numReplica); +// admin.modifyTable(table, desc); +// admin.enableTable(table); +// assert(admin.isTableEnabled(table)); +// regions = TEST_UTIL.getMiniHBaseCluster().getMaster() +// .getAssignmentManager().getRegionStates().getRegionsOfTable(table); +// assert(regions.size() == numRegions * numReplica); +// //also make sure the meta table has the replica locations removed +// hris = MetaReader.getTableRegions(ct, table); +// assert(hris.size() == numRegions * numReplica); +// //just check that the number of default replica regions in the meta table are the same +// //as the number of regions the table was created with, and the count of the +// //replicas is numReplica for each region +// Map defaultReplicas = new HashMap(); +// for (HRegionInfo hri : hris) { +// Integer i; +// HRegionInfo regionReplica0 = hri.getRegionInfoForReplica(0); +// defaultReplicas.put(regionReplica0, +// (i = defaultReplicas.get(regionReplica0)) == null ? 1 : i + 1); +// } +// assert(defaultReplicas.size() == numRegions); +// Collection counts = new HashSet(defaultReplicas.values()); +// assert(counts.size() == 1 && counts.contains(new Integer(numReplica))); + } finally { + admin.disableTable(table); + admin.deleteTable(table); + } + } + + //@Test (TODO: enable when we have support for alter_table- HBASE-10361). + public void testIncompleteMetaTableReplicaInformation() throws Exception { + final TableName table = TableName.valueOf("fooTableTest1"); + final int numRegions = 3; + final int numReplica = 2; + try { + // Create a table and let the meta table be updated with the location of the + // region locations. + HTableDescriptor desc = new HTableDescriptor(table); + desc.setRegionReplication(numReplica); + desc.addFamily(new HColumnDescriptor("family")); + admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), numRegions); + TEST_UTIL.waitTableEnabled(table.getName()); + CatalogTracker ct = new CatalogTracker(TEST_UTIL.getConfiguration()); + Set tableRows = new HashSet(); + List hris = MetaReader.getTableRegions(ct, table); + for (HRegionInfo hri : hris) { + tableRows.add(hri.getRegionName()); + } + admin.disableTable(table); + // now delete one replica info from all the rows + // this is to make the meta appear to be only partially updated + HTable metaTable = new HTable(TableName.META_TABLE_NAME, ct.getConnection()); + for (byte[] row : tableRows) { + Delete deleteOneReplicaLocation = new Delete(row); + deleteOneReplicaLocation.deleteColumns(HConstants.CATALOG_FAMILY, MetaReader.getServerColumn(1)); + deleteOneReplicaLocation.deleteColumns(HConstants.CATALOG_FAMILY, MetaReader.getSeqNumColumn(1)); + deleteOneReplicaLocation.deleteColumns(HConstants.CATALOG_FAMILY, MetaReader.getStartCodeColumn(1)); + metaTable.delete(deleteOneReplicaLocation); + } + metaTable.close(); + // even if the meta table is partly updated, when we re-enable the table, we should + // get back the desired number of replicas for the regions + admin.enableTable(table); + assert(admin.isTableEnabled(table)); + List regions = TEST_UTIL.getMiniHBaseCluster().getMaster() + .getAssignmentManager().getRegionStates().getRegionsOfTable(table); + assert(regions.size() == numRegions * numReplica); + } finally { + admin.disableTable(table); + admin.deleteTable(table); + } + } + + private String printRegions(List regions) { + StringBuffer strBuf = new StringBuffer(); + for (HRegionInfo r : regions) { + strBuf.append(" ____ " + r.toString()); + } + return strBuf.toString(); + } + + private void validateNumberOfRowsInMeta(final TableName table, int numRegions, CatalogTracker ct) + throws IOException { + assert(admin.tableExists(table)); + final AtomicInteger count = new AtomicInteger(); + Visitor visitor = new Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (HRegionInfo.getHRegionInfo(r).getTable().equals(table)) count.incrementAndGet(); + return true; + } + }; + MetaReader.fullScan(ct, visitor); + assert(count.get() == numRegions); + } + + private void validateFromSnapshotFromMeta(TableName table, int numRegions, + int numReplica, CatalogTracker ct) throws IOException { + SnapshotOfRegionAssignmentFromMeta snapshot = new SnapshotOfRegionAssignmentFromMeta(ct); + snapshot.initialize(); + Map regionToServerMap = snapshot.getRegionToRegionServerMap(); + assert(regionToServerMap.size() == numRegions * numReplica + 1); //'1' for the namespace + Map> serverToRegionMap = snapshot.getRegionServerToRegionMap(); + for (Map.Entry> entry : serverToRegionMap.entrySet()) { + List regions = entry.getValue(); + Set setOfStartKeys = new HashSet(); + for (HRegionInfo region : regions) { + byte[] startKey = region.getStartKey(); + if (region.getTable().equals(table)) { + setOfStartKeys.add(startKey); //ignore other tables + LOG.info("--STARTKEY " + new String(startKey)+"--"); + } + } + // the number of startkeys will be equal to the number of regions hosted in each server + // (each server will be hosting one replica of a region) + assertEquals(setOfStartKeys.size() , numRegions); + } + } + + private void validateSingleRegionServerAssignment(CatalogTracker ct, int numRegions, + int numReplica) throws IOException { + SnapshotOfRegionAssignmentFromMeta snapshot = new SnapshotOfRegionAssignmentFromMeta(ct); + snapshot.initialize(); + Map regionToServerMap = snapshot.getRegionToRegionServerMap(); + assert(regionToServerMap.size() == numRegions * numReplica + 1); //'1' for the namespace + Map> serverToRegionMap = snapshot.getRegionServerToRegionMap(); + assert(serverToRegionMap.keySet().size() == 1); + assert(serverToRegionMap.values().iterator().next().size() == numRegions * numReplica + 1); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java index d0cf4fa..891b32c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -34,16 +35,24 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.mockito.Mockito; @Category(MediumTests.class) public class TestBaseLoadBalancer extends BalancerTestBase { private static LoadBalancer loadBalancer; + private static RackManager rackManager; + private static final int NUM_SERVERS = 15; + private static ServerName[] servers = new ServerName[NUM_SERVERS]; private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer.class); int[][] regionsAndServersMocks = new int[][] { @@ -58,6 +67,20 @@ public class TestBaseLoadBalancer extends BalancerTestBase { Configuration conf = HBaseConfiguration.create(); loadBalancer = new MockBalancer(); loadBalancer.setConf(conf); + // Set up the rack topologies (5 machines per rack) + rackManager = Mockito.mock(RackManager.class); + for (int i = 0; i < NUM_SERVERS; i++) { + servers[i] = ServerName.valueOf("foo"+i+":1234",-1); + if (i < 5) { + Mockito.when(rackManager.getRack(servers[i])).thenReturn("rack1"); + } + if (i >= 5 && i < 10) { + Mockito.when(rackManager.getRack(servers[i])).thenReturn("rack2"); + } + if (i >= 10) { + Mockito.when(rackManager.getRack(servers[i])).thenReturn("rack3"); + } + } } public static class MockBalancer extends BaseLoadBalancer { @@ -174,6 +197,137 @@ public class TestBaseLoadBalancer extends BalancerTestBase { assertRetainedAssignment(existing, listOfServerNames, assignment); } + //TODO: HBASE-10351 should reenable this test +// @Test +// public void testRegionAvailability() throws Exception { +// // Create a cluster with a few servers, assign them to specific racks +// // then assign some regions. The tests should check whether moving a +// // replica from one node to a specific other node or rack lowers the +// // availability of the region or not +// +// List list0 = new ArrayList(); +// List list1 = new ArrayList(); +// List list2 = new ArrayList(); +// // create a region (region1) +// HRegionInfo hri1 = new HRegionInfo( +// TableName.valueOf("table"), "key1".getBytes(), "key2".getBytes(), +// false, 100); +// // create a replica of the region (replica_of_region1) +// HRegionInfo hri2 = RegionReplicaUtil.getRegionInfoForReplica(hri1, 1); +// // create a second region (region2) +// HRegionInfo hri3 = new HRegionInfo( +// TableName.valueOf("table"), "key2".getBytes(), "key3".getBytes(), +// false, 101); +// list0.add(hri1); //only region1 +// list1.add(hri2); //only replica_of_region1 +// list2.add(hri3); //only region2 +// Map> clusterState = +// new LinkedHashMap>(); +// clusterState.put(servers[0], list0); //servers[0] hosts region1 +// clusterState.put(servers[1], list1); //servers[1] hosts replica_of_region1 +// clusterState.put(servers[2], list2); //servers[2] hosts region2 +// // create a cluster with the above clusterState. The way in which the +// // cluster is created (constructor code) would make sure the indices of +// // the servers are in the order in which it is inserted in the clusterState +// // map (linkedhashmap is important). A similar thing applies to the region lists +// Cluster cluster = new Cluster(clusterState, null, null, rackManager); +// // check whether a move of region1 from servers[0] to servers[1] would lower +// // the availability of region1 +// assertTrue(cluster.wouldLowerAvailability(1, 0)); +// // check whether a move of region1 from servers[0] to servers[2] would lower +// // the availability of region1 +// assertTrue(!cluster.wouldLowerAvailability(2, 0)); +// // check whether a move of replica_of_region1 from servers[0] to servers[2] would lower +// // the availability of replica_of_region1 +// assertTrue(!cluster.wouldLowerAvailability(2, 1)); +// // check whether a move of region2 from servers[0] to servers[1] would lower +// // the availability of region2 +// assertTrue(!cluster.wouldLowerAvailability(1, 2)); +// +// // now lets have servers[1] host replica_of_region2 +// list1.add(RegionReplicaUtil.getRegionInfoForReplica(hri3, 1)); +// // create a new clusterState with the above change +// cluster = new Cluster(clusterState, null, null, rackManager); +// // now check whether a move of a replica from servers[0] to servers[1] would lower +// // the availability of region2 +// assertTrue(cluster.wouldLowerAvailability(1, 2)); +// +// // start over again +// clusterState.clear(); +// clusterState.put(servers[0], list0); //servers[0], rack1 hosts region1 +// clusterState.put(servers[5], list1); //servers[5], rack2 hosts replica_of_region1 and region2 +// clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2 +// // create a cluster with the above clusterState +// cluster = new Cluster(clusterState, null, null, rackManager); +// // check whether a move of region1 from servers[0],rack1 to servers[6],rack2 would +// // lower the availability +// assertTrue(cluster.wouldLowerAvailability(2, 0)); +// +// // now create a cluster without the rack manager +// cluster = new Cluster(clusterState, null, null, null); +// // now repeat check whether a move of region1 from servers[0] to servers[6] would +// // lower the availability +// assertTrue(!cluster.wouldLowerAvailability(2, 0)); +// } + + //TODO: HBASE-10351 should reenable this test +// @Test +// public void testRegionAvailabilityWithRegionMoves() throws Exception { +// List list0 = new ArrayList(); +// List list1 = new ArrayList(); +// List list2 = new ArrayList(); +// // create a region (region1) +// HRegionInfo hri1 = new HRegionInfo( +// TableName.valueOf("table"), "key1".getBytes(), "key2".getBytes(), +// false, 100); +// // create a replica of the region (replica_of_region1) +// HRegionInfo hri2 = RegionReplicaUtil.getRegionInfoForReplica(hri1, 1); +// // create a second region (region2) +// HRegionInfo hri3 = new HRegionInfo( +// TableName.valueOf("table"), "key2".getBytes(), "key3".getBytes(), +// false, 101); +// list0.add(hri1); //only region1 +// list1.add(hri2); //only replica_of_region1 +// list2.add(hri3); //only region2 +// Map> clusterState = +// new LinkedHashMap>(); +// clusterState.put(servers[0], list0); //servers[0] hosts region1 +// clusterState.put(servers[1], list1); //servers[1] hosts replica_of_region1 +// clusterState.put(servers[2], list2); //servers[2] hosts region2 +// // create a cluster with the above clusterState. The way in which the +// // cluster is created (constructor code) would make sure the indices of +// // the servers are in the order in which it is inserted in the clusterState +// // map (linkedhashmap is important). +// Cluster cluster = new Cluster(clusterState, null, null, rackManager); +// // check whether moving region1 from servers[1] to servers[2] would lower availability +// assertTrue(!cluster.wouldLowerAvailability(2, 0)); +// +// // now move region1 from servers[0] to servers[2] +// cluster.updateReplicaMap(0, 2, 0); +// // now repeat check whether moving region1 from servers[1] to servers[2] +// // would lower availability +// assertTrue(cluster.wouldLowerAvailability(2, 0)); +// +// // start over again +// clusterState.clear(); +// List list3 = new ArrayList(); +// list3.add(RegionReplicaUtil.getRegionInfoForReplica(hri3, 1)); +// clusterState.put(servers[0], list0); //servers[0], rack1 hosts region1 +// clusterState.put(servers[5], list1); //servers[5], rack2 hosts replica_of_region1 +// clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2 +// clusterState.put(servers[12], list3); //servers[12], rack3 hosts replica_of_region2 +// // create a cluster with the above clusterState +// cluster = new Cluster(clusterState, null, null, rackManager); +// // check whether a move of replica_of_region2 from servers[12],rack3 to servers[0],rack1 would +// // lower the availability +// assertTrue(!cluster.wouldLowerAvailability(0, 3)); +// // now move region2 from servers[6],rack2 to servers[0],rack1 +// cluster.updateReplicaMap(2, 0, 2); +// // now repeat check if replica_of_region2 from servers[12],rack3 to servers[0],rack1 would +// // lower the availability +// assertTrue(cluster.wouldLowerAvailability(0, 3)); +// } + private List getListOfServerNames(final List sals) { List list = new ArrayList(); for (ServerAndLoad e : sals) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 162a257..a66a60e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.net.NetworkTopology; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -57,6 +59,8 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public static void beforeAllTests() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); + conf.setClass("hbase.util.ip.to.rack.determiner", + MyRackResolver.class, DNSToSwitchMapping.class); loadBalancer = new StochasticLoadBalancer(); loadBalancer.setConf(conf); } @@ -386,4 +390,22 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { cluster[cluster.length - 1] = numRegions - ((cluster.length - 1) * numRegionsPerServer); return mockClusterServers(cluster, numTables); } + + + public static class MyRackResolver implements DNSToSwitchMapping { + + public MyRackResolver(Configuration conf) {} + + @Override + public List resolve(List names) { + List racks = new ArrayList(names.size()); + for (int i = 0; i < names.size(); i++) { + racks.add(i, NetworkTopology.DEFAULT_RACK); + } + return racks; + } + + @Override + public void reloadCachedMappings() {} + } }