diff --git a/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java b/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java index c787708..cbceff6 100644 --- a/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java +++ b/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java @@ -65,7 +65,7 @@ import org.apache.hadoop.util.ReflectionUtils; public class HBaseClient { public static final Log LOG = - LogFactory.getLog("org.apache.hadoop.ipc.HBaseClass"); + LogFactory.getLog("org.apache.hadoop.ipc.HBaseClient"); protected Hashtable connections = new Hashtable(); diff --git a/src/java/org/apache/hadoop/hbase/master/HMaster.java b/src/java/org/apache/hadoop/hbase/master/HMaster.java index 030829c..3b35a78 100644 --- a/src/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/master/HMaster.java @@ -549,7 +549,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, else if(region.isMetaRegion()) { MetaRegion m = new MetaRegion(new HServerAddress(address), - region.getRegionName(), region.getStartKey()); + region); regionManager.addMetaRegionToScan(m); } assignedRegions.put(region.getRegionName(), region); diff --git a/src/java/org/apache/hadoop/hbase/master/MetaRegion.java b/src/java/org/apache/hadoop/hbase/master/MetaRegion.java index 24f57b0..7a2aa35 100644 --- a/src/java/org/apache/hadoop/hbase/master/MetaRegion.java +++ b/src/java/org/apache/hadoop/hbase/master/MetaRegion.java @@ -23,42 +23,36 @@ import java.util.Arrays; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.util.Bytes; /** Describes a meta region and its server */ public class MetaRegion implements Comparable { private final HServerAddress server; - private final byte [] regionName; - private final byte [] startKey; + private HRegionInfo regionInfo; - MetaRegion(final HServerAddress server, final byte [] regionName) { - this (server, regionName, HConstants.EMPTY_START_ROW); - } - - MetaRegion(final HServerAddress server, final byte [] regionName, - final byte [] startKey) { + MetaRegion(final HServerAddress server, HRegionInfo regionInfo) { if (server == null) { throw new IllegalArgumentException("server cannot be null"); } this.server = server; - if (regionName == null) { - throw new IllegalArgumentException("regionName cannot be null"); + if (regionInfo == null) { + throw new IllegalArgumentException("regionInfo cannot be null"); } - this.regionName = regionName; - this.startKey = startKey; + this.regionInfo = regionInfo; } @Override public String toString() { - return "{regionname: " + Bytes.toString(this.regionName) + - ", startKey: <" + Bytes.toString(this.startKey) + - ">, server: " + this.server.toString() + "}"; + return "{server: " + this.server.toString() + ", regionname: " + + regionInfo.getRegionNameAsString() + ", startKey: <" + + Bytes.toString(regionInfo.getStartKey()) + ">}"; } /** @return the regionName */ public byte [] getRegionName() { - return regionName; + return regionInfo.getRegionName(); } /** @return the server */ @@ -68,7 +62,11 @@ public class MetaRegion implements Comparable { /** @return the startKey */ public byte [] getStartKey() { - return startKey; + return regionInfo.getStartKey(); + } + + public HRegionInfo getRegionInfo() { + return regionInfo; } @Override @@ -78,22 +76,17 @@ public class MetaRegion implements Comparable { @Override public int hashCode() { - int result = Arrays.hashCode(this.regionName); - result ^= Arrays.hashCode(this.startKey); - return result; + return regionInfo.hashCode(); } // Comparable public int compareTo(MetaRegion other) { - int result = Bytes.compareTo(this.regionName, other.getRegionName()); - if(result == 0) { - result = Bytes.compareTo(this.startKey, other.getStartKey()); - if (result == 0) { - // Might be on different host? - result = this.server.compareTo(other.server); - } + int cmp = regionInfo.compareTo(other.regionInfo); + if(cmp == 0) { + // Might be on different host? + cmp = this.server.compareTo(other.server); } - return result; + return cmp; } } \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java b/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java index 5ebca62..e096dd2 100644 --- a/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java +++ b/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java @@ -95,7 +95,7 @@ class ProcessRegionOpen extends ProcessRegionStatusChange { // It's a meta region. MetaRegion m = new MetaRegion(new HServerAddress(serverInfo.getServerAddress()), - regionInfo.getRegionName(), regionInfo.getStartKey()); + regionInfo); if (!master.regionManager.isInitialMetaScanComplete()) { // Put it on the queue to be scanned for the first time. if (LOG.isDebugEnabled()) { diff --git a/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java b/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java index f409b36..5c3d215 100644 --- a/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java +++ b/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java @@ -31,7 +31,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation { protected final HRegionInfo regionInfo; private volatile MetaRegion metaRegion = null; protected volatile byte[] metaRegionName = null; - + /** * @param master * @param regionInfo @@ -67,7 +67,7 @@ abstract class ProcessRegionStatusChange extends RegionServerOperation { if (isMetaTable) { this.metaRegionName = HRegionInfo.ROOT_REGIONINFO.getRegionName(); this.metaRegion = new MetaRegion(master.getRootRegionLocation(), - this.metaRegionName, HConstants.EMPTY_START_ROW); + HRegionInfo.ROOT_REGIONINFO); } else { this.metaRegion = master.regionManager.getFirstMetaRegionForRegion(regionInfo); diff --git a/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java b/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java index 8c5d793..5b035b2 100644 --- a/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java +++ b/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.regionserver.HLog; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -44,12 +45,14 @@ import org.apache.hadoop.hbase.io.RowResult; */ class ProcessServerShutdown extends RegionServerOperation { private final String deadServer; - private final boolean rootRegionServer; - private boolean rootRegionReassigned = false; + private boolean isRootServer; + private List metaRegions; + private Path oldLogDir; private boolean logSplit; private boolean rootRescanned; - + private HServerAddress deadServerAddress; + private static class ToDoEntry { boolean regionOffline; @@ -66,17 +69,33 @@ class ProcessServerShutdown extends RegionServerOperation { /** * @param master * @param serverInfo - * @param rootRegionServer */ - public ProcessServerShutdown(HMaster master, HServerInfo serverInfo, - boolean rootRegionServer) { + public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) { super(master); this.deadServer = HServerInfo.getServerName(serverInfo); - this.rootRegionServer = rootRegionServer; + this.deadServerAddress = serverInfo.getServerAddress(); this.logSplit = false; this.rootRescanned = false; this.oldLogDir = new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo)); + + // check to see if I am responsible for either ROOT or any of the META tables. + + closeMetaRegions(); + } + + private void closeMetaRegions() { + isRootServer = master.regionManager.isRootServer(deadServerAddress); + if (isRootServer) { + master.regionManager.unsetRootRegion(); + } + List metaStarts = master.regionManager.isMetaServer(deadServerAddress); + + metaRegions = new ArrayList(); + for (byte [] region : metaStarts) { + MetaRegion r = master.regionManager.offlineMetaRegion(region); + metaRegions.add(r); + } } @Override @@ -254,16 +273,22 @@ class ProcessServerShutdown extends RegionServerOperation { logSplit = true; } - if (this.rootRegionServer && !this.rootRegionReassigned) { - // avoid multiple root region reassignment - this.rootRegionReassigned = true; - // The server that died was serving the root region. Now that the log - // has been split, get it reassigned. + LOG.info("Log split complete, meta reassignment and scanning:"); + + if (this.isRootServer) { + LOG.info("ProcessServerShutdown reassigning ROOT region"); master.regionManager.reassignRootRegion(); - // When we call rootAvailable below, it will put us on the delayed - // to do queue to allow some time to pass during which the root - // region will hopefully get reassigned. + + isRootServer = false; // prevent double reassignment... heh. + } + + for (MetaRegion metaRegion : metaRegions) { + LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString()); + master.regionManager.setUnassigned(metaRegion.getRegionInfo(), true); } + // one the meta regions are online, "forget" about them. Since there are explicit + // checks below to make sure meta/root are online, this is likely to occur. + metaRegions.clear(); if (!rootAvailable()) { // Return true so that worker does not put this request back on the @@ -276,8 +301,7 @@ class ProcessServerShutdown extends RegionServerOperation { // Scan the ROOT region Boolean result = new ScanRootRegion( new MetaRegion(master.getRootRegionLocation(), - HRegionInfo.ROOT_REGIONINFO.getRegionName(), - HConstants.EMPTY_START_ROW), this.master).doWithRetries(); + HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries(); if (result == null) { // Master is closing - give up return true; diff --git a/src/java/org/apache/hadoop/hbase/master/RegionManager.java b/src/java/org/apache/hadoop/hbase/master/RegionManager.java index 0881d57..5fa64f5 100644 --- a/src/java/org/apache/hadoop/hbase/master/RegionManager.java +++ b/src/java/org/apache/hadoop/hbase/master/RegionManager.java @@ -96,7 +96,7 @@ class RegionManager implements HConstants { * * @see RegionState inner-class below */ - private final SortedMap regionsInTransition = + final SortedMap regionsInTransition = Collections.synchronizedSortedMap(new TreeMap()); // How many regions to assign a server at a time. @@ -164,6 +164,7 @@ class RegionManager implements HConstants { rootRegionLocation.set(null); regionsInTransition.remove( HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString()); + LOG.info("-ROOT- region unset (but not set to be reassigned)"); } } @@ -175,6 +176,7 @@ class RegionManager implements HConstants { s.setUnassigned(); regionsInTransition.put( HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s); + LOG.info("ROOT inserted into regionsInTransition"); } } } @@ -208,7 +210,7 @@ class RegionManager implements HConstants { } else { // otherwise, give this server a few regions taking into account the // load of all the other servers. - assignRegionsToMultipleServers(thisServersLoad, regionsToAssign, + assignRegionsToMultipleServers(thisServersLoad, regionsToAssign, info, returnMsgs); } } @@ -306,8 +308,32 @@ class RegionManager implements HConstants { LOG.info("Assigning region " + regionName + " to " + sinfo.getServerName()); rs.setPendingOpen(sinfo.getServerName()); this.regionsInTransition.put(regionName, rs); - this.historian.addRegionAssignment(rs.getRegionInfo(), - sinfo.getServerName()); + + // Since the meta/root may not be available at this moment, we + try { + // TODO move this into an actual class, and use the RetryableMetaOperation + master.toDoQueue.put( + new RegionServerOperation(master) { + protected boolean process() throws IOException { + if (!rootAvailable() || !metaTableAvailable()) { + return true; // the two above us will put us on the delayed queue + } + + // this call can cause problems if meta/root is offline! + historian.addRegionAssignment(rs.getRegionInfo(), + sinfo.getServerName()); + return true; + } + public String toString() { + return "RegionAssignmentHistorian from " + sinfo.getServerName(); + } + } + ); + } catch (InterruptedException e) { + // ignore and don't write the region historian + LOG.info("doRegionAssignment: Couldn't queue the region historian due to exception: " + e); + } + returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo())); } @@ -358,8 +384,14 @@ class RegionManager implements HConstants { private Set regionsAwaitingAssignment() { // set of regions we want to assign to this server Set regionsToAssign = new HashSet(); - - // Look over the set of regions that aren't currently assigned to + + // Handle if root is unassigned... only assign root if root is offline. + RegionState rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString()); + if (rootState != null && rootState.isUnassigned()) { + regionsToAssign.add(rootState); + return regionsToAssign; + } + // Look over the set of regions that aren't currently assigned to // determine which we should assign to this server. for (RegionState s: regionsInTransition.values()) { HRegionInfo i = s.getRegionInfo(); @@ -607,7 +639,7 @@ class RegionManager implements HConstants { Bytes.toString(HConstants.ROOT_TABLE_NAME)); } metaRegions.add(new MetaRegion(rootRegionLocation.get(), - HRegionInfo.ROOT_REGIONINFO.getRegionName())); + HRegionInfo.ROOT_REGIONINFO)); } else { if (!areAllMetaRegionsOnline()) { throw new NotAllMetaRegionsOnlineException(); @@ -712,11 +744,76 @@ class RegionManager implements HConstants { /** * Set an online MetaRegion offline - remove it from the map. * @param startKey region name + * @return the MetaRegion that was taken offline. */ - public void offlineMetaRegion(byte [] startKey) { - onlineMetaRegions.remove(startKey); + public MetaRegion offlineMetaRegion(byte [] startKey) { + LOG.info("META region removed from onlineMetaRegions"); + return onlineMetaRegions.remove(startKey); } - + + public boolean isRootServer(HServerAddress server) { + if (master.getRootRegionLocation() != null + && server.equals(master.getRootRegionLocation())) + return true; + return false; + } + + /** + * Returns the list of byte[] start-keys for any .META. regions hosted + * on the indicated server. + * + * @param server server address + * @return list of meta region start-keys. + */ + public List isMetaServer(HServerAddress server) { + List metas = new ArrayList(); + + for ( MetaRegion region : onlineMetaRegions.values() ) { + if (server.equals(region.getServer())) { + metas.add(region.getStartKey()); + } + } + + return metas; + } + + /** + * Call to take this metaserver offline for immediate reassignment. Used only + * when we know a region has shut down cleanly. + * + * A meta server is a server that hosts either -ROOT- or any .META. regions. + * + * If you are considering a unclean shutdown potentially, use ProcessServerShutdown which + * calls other methods to immediately unassign root/meta but delay the reassign until the + * log has been split. + * + * @param server the server that went down + * @return true if this was in fact a meta server, false if it did not carry meta regions. + */ + public synchronized boolean offlineMetaServer(HServerAddress server) { + boolean hasMeta = false; + + // check to see if ROOT and/or .META. are on this server, reassign them. + // use master.getRootRegionLocation. + if (master.getRootRegionLocation() != null && + server.equals(master.getRootRegionLocation())) { + LOG.info("Offlined ROOT server: " + server); + reassignRootRegion(); + hasMeta = true; + } + // AND + for ( MetaRegion region : onlineMetaRegions.values() ) { + if (server.equals(region.getServer())) { + LOG.info("Offlining META region: " + region); + // now reassign this. + offlineMetaRegion(region.getStartKey()); + setUnassigned(region.getRegionInfo(), true); + hasMeta = true; + } + } + return hasMeta; + } + /** * Remove a region from the region state map. * diff --git a/src/java/org/apache/hadoop/hbase/master/RootScanner.java b/src/java/org/apache/hadoop/hbase/master/RootScanner.java index 2bdeefa..8b1bdc2 100644 --- a/src/java/org/apache/hadoop/hbase/master/RootScanner.java +++ b/src/java/org/apache/hadoop/hbase/master/RootScanner.java @@ -52,7 +52,7 @@ class RootScanner extends BaseScanner { synchronized(scannerLock) { if (master.getRootRegionLocation() != null) { scanRegion(new MetaRegion(master.getRootRegionLocation(), - HRegionInfo.ROOT_REGIONINFO.getRegionName())); + HRegionInfo.ROOT_REGIONINFO)); } } } catch (IOException e) { diff --git a/src/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/java/org/apache/hadoop/hbase/master/ServerManager.java index 7e4e996..69bc32c 100644 --- a/src/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -184,14 +184,9 @@ class ServerManager implements HConstants { // The startup message was from a known server with the same name. // Timeout the old one right away. HServerAddress root = master.getRootRegionLocation(); - boolean rootServer = false; - if (root != null && root.equals(storedInfo.getServerAddress())) { - master.regionManager.unsetRootRegion(); - rootServer = true; - } try { master.toDoQueue.put( - new ProcessServerShutdown(master, storedInfo, rootServer)); + new ProcessServerShutdown(master, storedInfo)); } catch (InterruptedException e) { LOG.error("Insertion into toDoQueue was interrupted", e); } @@ -321,7 +316,8 @@ class ServerManager implements HConstants { private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) { synchronized (serversToServerInfo) { try { - // HRegionServer is shutting down. + // This method removes ROOT/META from the list and marks them to be reassigned + // in addition to other housework. if (removeServerInfo(serverInfo.getServerName(), serverInfo.getServerAddress())) { // Only process the exit message if the server still has registered info. @@ -335,13 +331,9 @@ class ServerManager implements HConstants { LOG.info("Processing " + msgs[i] + " from " + serverInfo.getServerName()); HRegionInfo info = msgs[i].getRegionInfo(); - synchronized (master.regionManager) { - if (info.isRootRegion()) { - master.regionManager.reassignRootRegion(); - } else { - if (info.isMetaTable()) { - master.regionManager.offlineMetaRegion(info.getStartKey()); - } + // Meta/root region offlining is handed in removeServerInfo above. + if (!info.isMetaRegion()) { + synchronized (master.regionManager) { if (!master.regionManager.isOfflined( info.getRegionNameAsString())) { master.regionManager.setUnassigned(info, true); @@ -467,14 +459,16 @@ class ServerManager implements HConstants { master.regionManager.setPendingClose(i.getRegionNameAsString()); } + // Figure out what the RegionServer ought to do, and write back. // Should we tell it close regions because its overloaded? If its // currently opening regions, leave it alone till all are open. - if (openingCount < this.nobalancingCount) { + if ((openingCount < this.nobalancingCount)) { this.master.regionManager.assignRegions(serverInfo, mostLoadedRegions, returnMsgs); } + // Send any pending table actions. this.master.regionManager.applyActions(serverInfo, returnMsgs); } @@ -644,10 +638,8 @@ class ServerManager implements HConstants { // This method can be called a couple of times during shutdown. if (info != null) { LOG.info("Removing server's info " + serverName); - if (master.getRootRegionLocation() != null && - info.getServerAddress().equals(master.getRootRegionLocation())) { - master.regionManager.unsetRootRegion(); - } + master.regionManager.offlineMetaServer(info.getServerAddress()); + infoUpdated = true; // update load information @@ -785,17 +777,7 @@ class ServerManager implements HConstants { // Remove the server from the known servers list and update load info serverAddressToServerInfo.remove(serverAddress); HServerInfo info = serversToServerInfo.remove(server); - boolean rootServer = false; if (info != null) { - HServerAddress root = master.getRootRegionLocation(); - if (root != null && root.equals(info.getServerAddress())) { - // NOTE: If the server was serving the root region, we cannot - // reassign - // it here because the new server will start serving the root region - // before ProcessServerShutdown has a chance to split the log file. - master.regionManager.unsetRootRegion(); - rootServer = true; - } String serverName = HServerInfo.getServerName(info); HServerLoad load = serversToLoad.remove(serverName); if (load != null) { @@ -812,8 +794,7 @@ class ServerManager implements HConstants { } deadServers.add(server); try { - master.toDoQueue.put(new ProcessServerShutdown(master, info, - rootServer)); + master.toDoQueue.put(new ProcessServerShutdown(master, info)); } catch (InterruptedException e) { LOG.error("insert into toDoQueue was interrupted", e); }