Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1349115) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -878,10 +878,10 @@ break; } if (regionState == null || - (!regionState.isPendingOpen() && !regionState.isOpening())) { + (!regionState.isOffline() && !regionState.isPendingOpen() && !regionState.isOpening())) { LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName + " from server " + sn + " but region was in " + - " the state " + regionState + " and not in PENDING_OPEN or OPENING"); + " the state " + regionState + " and not in OFFLINE, PENDING_OPEN or OPENING"); return; } // Handle this the same as if it were opened and then closed. @@ -901,15 +901,12 @@ failoverProcessedRegions.put(encodedName, hri); break; } - // Should see OPENING after we have asked it to OPEN or additional - // times after already being in state of OPENING if (regionState == null || - (!regionState.isPendingOpen() && !regionState.isOpening())) { - LOG.warn("Received OPENING for region " + - prettyPrintedRegionName + - " from server " + sn + " but region was in " + - " the state " + regionState + " and not " + - "in expected PENDING_OPEN or OPENING states"); + (!regionState.isOffline() && !regionState.isPendingOpen() && + !regionState.isOpening())) { + LOG.warn("Received OPENING for region " + prettyPrintedRegionName + " from server " + + sn + " but region was in " + " the state " + regionState + " and not " + + "in expected OFFLINE, PENDING_OPEN or OPENING states"); return; } // Transition to OPENING (or update stamp if already OPENING) @@ -1568,7 +1565,7 @@ @Override public void processResult(int rc, String path, Object ctx, String name) { if (rc != 0) { - // Thisis resultcode. If non-zero, need to resubmit. + // This is resultcode. If non-zero, need to resubmit. LOG.warn("rc != 0 for " + path + " -- retryable connectionloss -- " + "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2"); this.zkw.abort("Connectionloss writing unassigned at " + path + @@ -1715,14 +1712,13 @@ try { LOG.debug("Assigning region " + state.getRegion().getRegionNameAsString() + " to " + plan.getDestination().toString()); - // Transition RegionState to PENDING_OPEN - state.update(RegionState.State.PENDING_OPEN, System.currentTimeMillis(), + RegionOpeningState regionOpenState = this.serverManager.sendRegionOpen(plan.getDestination(), + state.getRegion(), versionOfOfflineNode); + if (regionOpenState == RegionOpeningState.OPENED) { + // Transition RegionState to PENDING_OPEN + state.update(RegionState.State.PENDING_OPEN, System.currentTimeMillis(), plan.getDestination()); - // Send OPEN RPC. This can fail if the server on other end is is not up. - // Pass the version that was obtained while setting the node to OFFLINE. - RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan - .getDestination(), state.getRegion(), versionOfOfflineNode); - if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { + } else if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { // Remove region from in-memory transition and unassigned node from ZK // While trying to enable the table the regions of the table were // already enabled. @@ -3197,11 +3193,12 @@ } /** - * Process shutdown server removing any assignments. + * Start processing of shutdown server. * @param sn Server that went down. - * @return list of regions in transition on this server + * @return Pair that has all regionplans that pertain to this dead server and a list that has + * the overlap of regions in transition and regions that were on the server. */ - public List processServerShutdown(final ServerName sn) { + public Pair, List> processServerShutdown(final ServerName sn) { // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator > i = @@ -3222,26 +3219,32 @@ List rits = new ArrayList(); synchronized (this.regions) { Set assignedRegions = this.servers.remove(sn); - if (assignedRegions == null || assignedRegions.isEmpty()) { - // No regions on this server, we are done, return empty list of RITs - return rits; + if (assignedRegions != null && !assignedRegions.isEmpty()) { + deadRegions = new TreeSet(assignedRegions); + for (HRegionInfo region : deadRegions) { + this.regions.remove(region); + } } - deadRegions = new TreeSet(assignedRegions); - for (HRegionInfo region : deadRegions) { - this.regions.remove(region); - } } // See if any of the regions that were online on this server were in RIT // If they are, normal timeouts will deal with them appropriately so // let's skip a manual re-assignment. // no lock concurrent access ok: we will be working on a copy, and it's java-valid to do // a copy while another thread is adding/removing items + Set regionsToAssign = new ConcurrentSkipListSet(); for (RegionState region : this.regionsInTransition.copyValues()) { if (deadRegions.remove(region.getRegion())) { rits.add(region); } + // If destination server in RegionState is same as dead server then add to regions to assign + // Skip the region in OFFLINE state because destionation server in RegionState is master + // server name. Skip the region if the destionation server in RegionState is other than dead + // server. + if(region.getServerName().equals(sn)){ + regionsToAssign.add(region.getRegion()); + } } - return rits; + return new Pair, List>(regionsToAssign, rits); } /** @@ -3389,8 +3392,16 @@ public enum State { OFFLINE, // region is in an offline state - PENDING_OPEN, // sent rpc to server to open but has not begun - OPENING, // server has begun to open but not yet done + + // We have sent the open rpc to the region server. We hold this state until regionserver + // assumes control of the region by setting znode to OPENING from OFFLINE (This is an + // in-memory state only with no corresponding znode state + PENDING_OPEN, + + // Set by zk callback when regionserver moves znode from OFFLINE to OPENING. Regionserver now + // has control of the region. + OPENING, + OPEN, // server opened region and updated meta PENDING_CLOSE, // sent rpc to server to close but has not begun CLOSING, // server has begun to close but not yet done @@ -3656,4 +3667,29 @@ this.master.abort(errorMsg, e); } } + + /** + * + * @param hri + * @return whether region is online or not. + */ + public boolean isRegionOnline(HRegionInfo hri) { + ServerName sn = null; + synchronized (this.regions) { + sn = this.regions.get(hri); + if (sn == null) { + return false; + } + if (this.isServerOnline(sn)) { + return true; + } + // Remove the assignment mapping for sn. + Set hriSet = this.servers.get(sn); + if (hriSet != null) { + hriSet.remove(hri); + } + this.regions.remove(hri); + return false; + } + } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1349115) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -43,6 +44,7 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.zookeeper.KeeperException; /** @@ -228,13 +230,12 @@ return; } - // Clean out anything in regions in transition. Being conservative and - // doing after log splitting. Could do some states before -- OPENING? - // OFFLINE? -- and then others after like CLOSING that depend on log - // splitting. - List regionsInTransition = - this.services.getAssignmentManager(). - processServerShutdown(this.serverName); + // Returns set of regions that had regionplans against the downed server and a list of + // the intersection of regions-in-transition and regions that were on the server that died. + Pair, List> p = this.services.getAssignmentManager() + .processServerShutdown(this.serverName); + Set regionsFromRIT = p.getFirst(); + List ritsIntersection = p.getSecond(); // Wait on meta to come online; we need it to progress. // TODO: Best way to hold strictly here? We should build this retry logic @@ -268,7 +269,7 @@ } // Skip regions that were in transition unless CLOSING or PENDING_CLOSE - for (RegionState rit : regionsInTransition) { + for (RegionState rit : ritsIntersection) { if (!rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) { LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() + " from list of regions to assign because in RIT; region state: " + @@ -277,16 +278,14 @@ } } - assert regionsInTransition != null; - LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) + - " region(s) that " + (serverName == null? "null": serverName) + - " was carrying (skipping " + - regionsInTransition.size() + - " regions(s) that are already in transition)"); + assert ritsIntersection != null; + LOG.info("Reassigning " + ((hris == null) ? 0 : hris.size()) + " region(s) that " + + (serverName == null ? "null" : serverName) + " was carrying (skipping " + + ritsIntersection.size() + " regions(s) that are already in transition)"); // Iterate regions that were on this server and assign them + List toAssignRegions = new ArrayList(); if (hris != null) { - List toAssignRegions = new ArrayList(); for (Map.Entry e: hris.entrySet()) { RegionState rit = this.services.getAssignmentManager().isRegionInTransition(e.getKey()); if (processDeadRegion(e.getKey(), e.getValue(), @@ -294,19 +293,18 @@ this.server.getCatalogTracker())) { ServerName addressFromAM = this.services.getAssignmentManager() .getRegionServerOfRegion(e.getKey()); - if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) { + if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting() + && !regionsFromRIT.contains(rit.getRegion())) { // Skip regions that were in transition unless CLOSING or // PENDING_CLOSE LOG.info("Skip assigning region " + rit.toString()); - } else if (addressFromAM != null - && !addressFromAM.equals(this.serverName)) { - LOG.debug("Skip assigning region " - + e.getKey().getRegionNameAsString() - + " because it has been opened in " - + addressFromAM.getServerName()); - } else { - toAssignRegions.add(e.getKey()); - } + } else if (addressFromAM != null && !addressFromAM.equals(this.serverName)) { + LOG.debug("Skip assigning region " + e.getKey().getRegionNameAsString() + + " because it has been opened in " + addressFromAM.getServerName()); + regionsFromRIT.remove(rit.getRegion()); + } else { + toAssignRegions.add(e.getKey()); + } } else if (rit != null && (rit.isSplitting() || rit.isSplit())) { // This will happen when the RS went down and the call back for the SPLIITING or SPLIT // has not yet happened for node Deleted event. In that case if the region was actually @@ -335,12 +333,22 @@ toAssignRegions.remove(hri); } } - // Get all available servers - List availableServers = services.getServerManager() - .createDestinationServersList(); - this.services.getAssignmentManager().assign(toAssignRegions, - availableServers); } + int reassignedRegions = 0; + // Get all available servers + for (HRegionInfo hri : regionsFromRIT) { + if (!this.services.getAssignmentManager().isRegionOnline(hri)) { + if (!toAssignRegions.contains(hri)) { + toAssignRegions.add(hri); + } + } + } + List availableServers = services.getServerManager() + .createDestinationServersList(); + this.services.getAssignmentManager().assign(toAssignRegions, availableServers); + LOG.info(reassignedRegions + " regions which were planned to open on " + this.serverName + + " have been re-assigned."); + } finally { this.deadServers.finish(serverName); }