diff --git src/java/org/apache/hadoop/hbase/master/HMaster.java src/java/org/apache/hadoop/hbase/master/HMaster.java index a60e580..b8cd322 100644 --- src/java/org/apache/hadoop/hbase/master/HMaster.java +++ src/java/org/apache/hadoop/hbase/master/HMaster.java @@ -36,6 +36,8 @@ import java.util.concurrent.DelayQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -160,6 +162,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, RegionManager regionManager; private MasterMetrics metrics; + final Lock splitLogLock = new ReentrantLock(); /** * Build the HMaster out of a raw configuration item. @@ -611,14 +614,14 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, if(this.serverManager.getServerInfo(serverName) == null) { LOG.info("Log folder doesn't belong " + "to a known region server, splitting"); - this.regionManager.splitLogLock.lock(); + this.splitLogLock.lock(); Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName)); try { HLog.splitLog(this.rootdir, logDir, this.fs, getConfiguration()); } finally { - this.regionManager.splitLogLock.unlock(); + this.splitLogLock.unlock(); } } else { LOG.info("Log folder belongs to an existing region server"); diff --git src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java index 7a01ce3..3888985 100644 --- src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java +++ src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.hadoop.fs.Path; @@ -39,6 +40,8 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.io.RowResult; +import org.apache.hadoop.hbase.master.RegionManager.RegionState; + /** * Instantiated when a server's lease has expired, meaning it has crashed. @@ -83,6 +86,7 @@ class ProcessServerShutdown extends RegionServerOperation { // check to see if I am responsible for either ROOT or any of the META tables. + // TODO Why do we do this now instead of at processing time? closeMetaRegions(); } @@ -112,6 +116,19 @@ class ProcessServerShutdown extends RegionServerOperation { } } + private void closeRegionsInTransition() { + Map inTransition = + master.regionManager.getRegionsInTransitionOnServer(deadServer); + for (Map.Entry entry : inTransition.entrySet()) { + String regionName = entry.getKey(); + RegionState state = entry.getValue(); + + LOG.info("Region " + regionName + " was in transition " + + state + " on dead server " + deadServer + " - marking unassigned"); + master.regionManager.setUnassigned(state.getRegionInfo(), true); + } + } + @Override public String toString() { return "ProcessServerShutdown of " + this.deadServer; @@ -279,14 +296,14 @@ class ProcessServerShutdown extends RegionServerOperation { if (!logSplit) { // Process the old log file if (master.fs.exists(oldLogDir)) { - if (!master.regionManager.splitLogLock.tryLock()) { + if (!master.splitLogLock.tryLock()) { return false; } try { HLog.splitLog(master.rootdir, oldLogDir, master.fs, master.getConfiguration()); } finally { - master.regionManager.splitLogLock.unlock(); + master.splitLogLock.unlock(); } } logSplit = true; @@ -351,6 +368,9 @@ class ProcessServerShutdown extends RegionServerOperation { Bytes.toString(r.getRegionName()) + " on " + r.getServer()); } } + + closeRegionsInTransition(); + // Remove this server from dead servers list. Finished splitting logs. this.master.serverManager.removeDeadServer(deadServer); if (LOG.isDebugEnabled()) { diff --git src/java/org/apache/hadoop/hbase/master/RegionManager.java src/java/org/apache/hadoop/hbase/master/RegionManager.java index a3d6563..3d5c99e 100644 --- src/java/org/apache/hadoop/hbase/master/RegionManager.java +++ src/java/org/apache/hadoop/hbase/master/RegionManager.java @@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -33,8 +34,6 @@ import java.util.TreeMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -68,8 +67,6 @@ class RegionManager implements HConstants { private volatile boolean safeMode = true; - final Lock splitLogLock = new ReentrantLock(); - private final RootScanner rootScannerThread; final MetaScanner metaScannerThread; @@ -168,8 +165,8 @@ class RegionManager implements HConstants { unsetRootRegion(); if (!master.shutdownRequested.get()) { synchronized (regionsInTransition) { - RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO); - s.setUnassigned(); + RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO, + RegionState.State.UNASSIGNED); regionsInTransition.put( HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s); LOG.info("ROOT inserted into regionsInTransition"); @@ -587,6 +584,23 @@ class RegionManager implements HConstants { } return false; } + + /** + * Return a map of the regions in transition on a server. + * Returned map entries are region name -> RegionState + */ + Map getRegionsInTransitionOnServer(String serverName) { + Map ret = new HashMap(); + synchronized (regionsInTransition) { + for (Map.Entry entry : regionsInTransition.entrySet()) { + RegionState rs = entry.getValue(); + if (serverName.equals(rs.getServerName())) { + ret.put(entry.getKey(), rs); + } + } + } + return ret; + } /** * Stop the root and meta scanners so that the region servers serving meta @@ -736,8 +750,7 @@ class RegionManager implements HConstants { byte [] regionName = region.getRegionName(); Put put = new Put(regionName); - byte [] infoBytes = Writables.getBytes(info); - String infoString = new String(infoBytes); + put.add(CATALOG_FAMILY, REGIONINFO_QUALIFIER, Writables.getBytes(info)); server.put(metaRegionName, put); @@ -844,6 +857,10 @@ class RegionManager implements HConstants { && !s.isUnassigned() && s.getServerName() != null && s.getServerName().equals(server.toString())) { + // TODO this code appears to be entirely broken, since + // server.toString() has no start code, but s.getServerName() + // does! + LOG.fatal("I DONT BELIEVE YOU WILL EVER SEE THIS!"); // Has an outstanding meta region to be assigned. return true; } @@ -976,7 +993,7 @@ class RegionManager implements HConstants { synchronized (this.regionsInTransition) { s = regionsInTransition.get(info.getRegionNameAsString()); if (s == null) { - s = new RegionState(info); + s = new RegionState(info, RegionState.State.UNASSIGNED); regionsInTransition.put(info.getRegionNameAsString(), s); } } @@ -1058,7 +1075,7 @@ class RegionManager implements HConstants { RegionState s = this.regionsInTransition.get(regionInfo.getRegionNameAsString()); if (s == null) { - s = new RegionState(regionInfo); + s = new RegionState(regionInfo, RegionState.State.CLOSING); } // If region was asked to open before getting here, we could be taking // the wrong server name @@ -1530,22 +1547,30 @@ class RegionManager implements HConstants { * note on regionsInTransition data member above for listing of state * transitions. */ - private static class RegionState implements Comparable { + static class RegionState implements Comparable { private final HRegionInfo regionInfo; - private volatile boolean unassigned = false; - private volatile boolean pendingOpen = false; - private volatile boolean open = false; - private volatile boolean closing = false; - private volatile boolean pendingClose = false; - private volatile boolean closed = false; - private volatile boolean offlined = false; + + enum State { + UNASSIGNED, // awaiting a server to be assigned + PENDING_OPEN, // told a server to open, hasn't opened yet + OPEN, // has been opened on RS, but not yet marked in META/ROOT + CLOSING, // a msg has been enqueued to close ths region, but not delivered to RS yet + PENDING_CLOSE, // msg has been delivered to RS to close this region + CLOSED // region has been closed but not yet marked in meta + + } + + private State state; + + private boolean isOfflined; /* Set when region is assigned or closing */ - private volatile String serverName = null; + private String serverName = null; /* Constructor */ - RegionState(HRegionInfo info) { + RegionState(HRegionInfo info, State state) { this.regionInfo = info; + this.state = state; } synchronized HRegionInfo getRegionInfo() { @@ -1567,14 +1592,16 @@ class RegionManager implements HConstants { * @return true if the region is being opened */ synchronized boolean isOpening() { - return this.unassigned || this.pendingOpen || this.open; + return state == State.UNASSIGNED || + state == State.PENDING_OPEN || + state == State.OPEN; } /* * @return true if region is unassigned */ synchronized boolean isUnassigned() { - return unassigned; + return state == State.UNASSIGNED; } /* @@ -1583,120 +1610,84 @@ class RegionManager implements HConstants { * called unless it is safe to do so. */ synchronized void setUnassigned() { - this.unassigned = true; - this.pendingOpen = false; - this.open = false; - this.closing = false; - this.pendingClose = false; - this.closed = false; - this.offlined = false; + state = State.UNASSIGNED; this.serverName = null; } synchronized boolean isPendingOpen() { - return pendingOpen; + return state == State.PENDING_OPEN; } /* * @param serverName Server region was assigned to. */ synchronized void setPendingOpen(final String serverName) { - if (!this.unassigned) { + if (state != State.UNASSIGNED) { LOG.warn("Cannot assign a region that is not currently unassigned. " + "FIX!! State: " + toString()); } - this.unassigned = false; - this.pendingOpen = true; - this.open = false; - this.closing = false; - this.pendingClose = false; - this.closed = false; - this.offlined = false; + state = State.PENDING_OPEN; this.serverName = serverName; } synchronized boolean isOpen() { - return open; + return state == State.OPEN; } synchronized void setOpen() { - if (!pendingOpen) { + if (state != State.PENDING_OPEN) { LOG.warn("Cannot set a region as open if it has not been pending. " + "FIX!! State: " + toString()); } - this.unassigned = false; - this.pendingOpen = false; - this.open = true; - this.closing = false; - this.pendingClose = false; - this.closed = false; - this.offlined = false; + state = State.OPEN; } synchronized boolean isClosing() { - return closing; + return state == State.CLOSING; } synchronized void setClosing(String serverName, boolean setOffline) { - this.unassigned = false; - this.pendingOpen = false; - this.open = false; - this.closing = true; - this.pendingClose = false; - this.closed = false; - this.offlined = setOffline; + state = State.CLOSING; this.serverName = serverName; + this.isOfflined = setOffline; } synchronized boolean isPendingClose() { - return this.pendingClose; + return state == State.PENDING_CLOSE; } synchronized void setPendingClose() { - if (!closing) { + if (state != State.CLOSING) { LOG.warn("Cannot set a region as pending close if it has not been " + "closing. FIX!! State: " + toString()); } - this.unassigned = false; - this.pendingOpen = false; - this.open = false; - this.closing = false; - this.pendingClose = true; - this.closed = false; + state = State.PENDING_CLOSE; } synchronized boolean isClosed() { - return this.closed; + return state == State.CLOSED; } synchronized void setClosed() { - if (!pendingClose && !pendingOpen && !closing) { + if (state != State.PENDING_CLOSE && + state != State.PENDING_OPEN && + state != State.CLOSING) { throw new IllegalStateException( "Cannot set a region to be closed if it was not already marked as" + - " pending close, pending open or closing. State: " + toString()); + " pending close, pending open or closing. State: " + this); } - this.unassigned = false; - this.pendingOpen = false; - this.open = false; - this.closing = false; - this.pendingClose = false; - this.closed = true; + state = State.CLOSED; } synchronized boolean isOfflined() { - return this.offlined; + return (state == State.CLOSING || + state == State.PENDING_CLOSE) && isOfflined; } @Override public synchronized String toString() { return ("name=" + Bytes.toString(getRegionName()) + - ", unassigned=" + this.unassigned + - ", pendingOpen=" + this.pendingOpen + - ", open=" + this.open + - ", closing=" + this.closing + - ", pendingClose=" + this.pendingClose + - ", closed=" + this.closed + - ", offlined=" + this.offlined); + ", state=" + this.state); } @Override diff --git src/java/org/apache/hadoop/hbase/master/ServerManager.java src/java/org/apache/hadoop/hbase/master/ServerManager.java index a7f907b..4f78574 100644 --- src/java/org/apache/hadoop/hbase/master/ServerManager.java +++ src/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.HMsg.Type; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.master.RegionManager.RegionState; import org.apache.hadoop.hbase.util.Bytes; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -324,8 +325,15 @@ class ServerManager implements HConstants { } } - /** Region server is exiting */ + /** + * Region server is exiting with a clean shutdown. + * + * In this case, the server sends MSG_REPORT_EXITING in msgs[0] followed by + * a MSG_REPORT_CLOSE for each region it was serving. + */ private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) { + assert msgs[0].getType() == Type.MSG_REPORT_EXITING; + synchronized (serversToServerInfo) { try { // This method removes ROOT/META from the list and marks them to be reassigned @@ -342,6 +350,7 @@ class ServerManager implements HConstants { for (int i = 1; i < msgs.length; i++) { LOG.info("Processing " + msgs[i] + " from " + serverInfo.getServerName()); + assert msgs[i].getType() == Type.MSG_REGION_CLOSE; HRegionInfo info = msgs[i].getRegionInfo(); // Meta/root region offlining is handed in removeServerInfo above. if (!info.isMetaRegion()) { @@ -356,6 +365,18 @@ class ServerManager implements HConstants { } } } + + // There should not be any regions in transition for this server - the + // server should finish transitions itself before closing + Map inTransition = + master.regionManager.getRegionsInTransitionOnServer( + serverInfo.getServerName()); + for (Map.Entry entry : inTransition.entrySet()) { + LOG.warn("Region server " + serverInfo.getServerName() + + " shut down with region " + entry.getKey() + " in transition " + + "state " + entry.getValue()); + master.regionManager.setUnassigned(entry.getValue().getRegionInfo(), true); + } } // We don't need to return anything to the server because it isn't // going to do any more work.