Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -807,10 +807,10 @@ break; } if (regionState == null || - (!regionState.isPendingOpen() && !regionState.isOpening())) { + (!regionState.isOffline() && !regionState.isPendingOpen() && !regionState.isOpening())) { LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName + " from server " + sn + " but region was in " + - " the state " + regionState + " and not in PENDING_OPEN or OPENING"); + " the state " + regionState + " and not in OFFLINE, PENDING_OPEN or OPENING"); return; } // Handle this the same as if it were opened and then closed. @@ -830,15 +830,12 @@ failoverProcessedRegions.put(encodedName, hri); break; } - // Should see OPENING after we have asked it to OPEN or additional - // times after already being in state of OPENING if (regionState == null || - (!regionState.isPendingOpen() && !regionState.isOpening())) { - LOG.warn("Received OPENING for region " + - prettyPrintedRegionName + - " from server " + sn + " but region was in " + - " the state " + regionState + " and not " + - "in expected PENDING_OPEN or OPENING states"); + (!regionState.isOffline() && !regionState.isPendingOpen() && + !regionState.isOpening())) { + LOG.warn("Received OPENING for region " + prettyPrintedRegionName + " from server " + + sn + " but region was in " + " the state " + regionState + " and not " + + "in expected OFFLINE, PENDING_OPEN or OPENING states"); return; } // Transition to OPENING (or update stamp if already OPENING) @@ -856,7 +853,7 @@ } // Should see OPENED after OPENING but possible after PENDING_OPEN if (regionState == null || - (!regionState.isPendingOpen() && !regionState.isOpening())) { + (!regionState.isOffline() && !regionState.isPendingOpen() && !regionState.isOpening())) { LOG.warn("Received OPENED for region " + prettyPrintedRegionName + " from server " + sn + " but region was in " + @@ -1431,7 +1428,7 @@ @Override public void processResult(int rc, String path, Object ctx, String name) { if (rc != 0) { - // Thisis resultcode. If non-zero, need to resubmit. + // This is resultcode. If non-zero, need to resubmit. LOG.warn("rc != 0 for " + path + " -- retryable connectionloss -- " + "FIX see http://wiki.apache.org/hadoop/ZooKeeper/FAQ#A2"); this.zkw.abort("Connectionloss writing unassigned at " + path + @@ -1578,15 +1575,18 @@ try { LOG.info("Assigning region " + state.getRegion().getRegionNameAsString() + " to " + plan.getDestination().toString()); - // Transition RegionState to PENDING_OPEN - regionStates.updateRegionState(state.getRegion(), - RegionState.State.PENDING_OPEN, System.currentTimeMillis(), - plan.getDestination()); - // Send OPEN RPC. This can fail if the server on other end is is not up. - // Pass the version that was obtained while setting the node to OFFLINE. - RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan - .getDestination(), state.getRegion(), versionOfOfflineNode); - if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { + + RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan.getDestination(), + state.getRegion(), versionOfOfflineNode); + if (regionOpenState == RegionOpeningState.OPENED) { + // Transition RegionState to PENDING_OPEN + if (state.isOffline() && !state.isOpening()) { + regionStates.updateRegionState(state.getRegion(), RegionState.State.PENDING_OPEN, + System.currentTimeMillis(), plan.getDestination()); + } + if (state.isOpening()) return; + if (state.isOpened()) return; + } else if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { processAlreadyOpenedRegion(state.getRegion(), plan.getDestination()); } else if (regionOpenState == RegionOpeningState.FAILED_OPENING) { // Failed opening this region @@ -2977,11 +2977,12 @@ } /** - * Process shutdown server removing any assignments. + * Start processing of shutdown server. * @param sn Server that went down. - * @return list of regions in transition on this server + * @return Pair that has all regionplans that pertain to this dead server and a list that has + * the overlap of regions in transition and regions that were on the server. */ - public List processServerShutdown(final ServerName sn) { + public Pair, List> processServerShutdown(final ServerName sn) { // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator > i = @@ -3065,4 +3066,5 @@ this.master.abort(errorMsg, e); } } + } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -43,6 +44,7 @@ import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; @@ -229,13 +231,6 @@ return; } - // Clean out anything in regions in transition. Being conservative and - // doing after log splitting. Could do some states before -- OPENING? - // OFFLINE? -- and then others after like CLOSING that depend on log - // splitting. - List regionsInTransition = - this.services.getAssignmentManager(). - processServerShutdown(this.serverName); // Wait on meta to come online; we need it to progress. // TODO: Best way to hold strictly here? We should build this retry logic @@ -267,98 +262,154 @@ serverName + ", retrying META read", ioe); } } + + // Returns set of regions that had regionplans against the downed server and a list of + // the intersection of regions-in-transition and regions that were on the server that died. + Pair, List> p = this.services.getAssignmentManager() + .processServerShutdown(this.serverName); + Set regionsFromRIT = p.getFirst(); + List ritsIntersection = p.getSecond(); - // Skip regions that were in transition unless CLOSING or PENDING_CLOSE - for (RegionState rit : regionsInTransition) { - if (!rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) { - LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() + - " from list of regions to assign because in RIT; region state: " + - rit.getState()); - if (hris != null) hris.remove(rit.getRegion()); + List regionsToAssign = getRegionsToAssign(hris, ritsIntersection, regionsFromRIT); + int reassignedRegions = 0; + // Get all available servers + for (HRegionInfo hri : regionsFromRIT) { + if (!this.services.getAssignmentManager().getRegionStates().isRegionAssigned(hri)) { + if (!regionsToAssign.contains(hri)) { + regionsToAssign.add(hri); + } } } + List availableServers = services.getServerManager() + .createDestinationServersList(); + this.services.getAssignmentManager().assign(regionsToAssign, availableServers); + LOG.info(reassignedRegions + " regions which were planned to open on " + this.serverName + + " have been re-assigned."); - assert regionsInTransition != null; - LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) + - " region(s) that " + (serverName == null? "null": serverName) + - " was carrying (skipping " + - regionsInTransition.size() + - " regions(s) that are already in transition)"); + } finally { + this.deadServers.finish(serverName); + } + LOG.info("Finished processing of shutdown of " + serverName); + } + /** + * Figure what to assign from the dead server considering state of RIT and whats up in .META. + * @param metaHRIs Regions that .META. says were assigned to the dead server + * @param deadServerRITs Regions that were in transition against the dead server + * @return List of regions to assign or null if aborting. + * @throws IOException + */ // Iterate regions that were on this server and assign them - if (hris != null) { - List toAssignRegions = new ArrayList(); - for (Map.Entry e: hris.entrySet()) { - RegionState rit = services.getAssignmentManager() - .getRegionStates().getRegionTransitionState(e.getKey()); - if (processDeadRegion(e.getKey(), e.getValue(), - this.services.getAssignmentManager(), - this.server.getCatalogTracker())) { - ServerName addressFromAM = this.services.getAssignmentManager() - .getRegionStates().getRegionServerOfRegion(e.getKey()); - if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) { - // Skip regions that were in transition unless CLOSING or - // PENDING_CLOSE - LOG.info("Skip assigning region " + rit.toString()); - } else if (addressFromAM != null - && !addressFromAM.equals(this.serverName)) { - LOG.debug("Skip assigning region " - + e.getKey().getRegionNameAsString() - + " because it has been opened in " - + addressFromAM.getServerName()); - } else { - if (rit != null) { - //clean zk node - try{ - LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists"); - ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey()); - }catch (KeeperException ke) { - this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(), ke); - return; - } - } - toAssignRegions.add(e.getKey()); - } - } else if (rit != null && (rit.isSplitting() || rit.isSplit())) { - // This will happen when the RS went down and the call back for the SPLIITING or SPLIT - // has not yet happened for node Deleted event. In that case if the region was actually - // split - // but the RS had gone down before completing the split process then will not try to - // assign the parent region again. In that case we should make the region offline and - // also delete the region from RIT. - HRegionInfo region = rit.getRegion(); - AssignmentManager am = this.services.getAssignmentManager(); - am.regionOffline(region); + private List getRegionsToAssign(final NavigableMap metaHRIs, + final List deadServerRITs, Set regionsFromRIT) throws IOException { + // Collect regions to bulk assign here. + List toAssign = new ArrayList(); + // If no regions on the server, then nothing to assign (Regions that were + // currently being + // assigned will be retried over in the AM#assign method). + if (metaHRIs == null || metaHRIs.isEmpty()) return toAssign; + // Remove regions that we do not want to reassign such as regions that are + // OFFLINE. If + // region is OFFLINE against this server, its probably being assigned over + // in the single + // region assign method in AM; do not assign it here too. TODO: VERIFY!!! + // TODO: Currently + // OFFLINE is too messy. Its done on single assign but bulk done when bulk + // assigning and then + // there is special handling when master joins a cluster. + // + // If split, the zk callback will have offlined. Daughters will be in the + // list of hris we got + // from scanning the .META. These should be reassigned. Not the parent. + for (RegionState rs : deadServerRITs) { + if (!rs.isClosing() && !rs.isPendingClose() && !rs.isSplitting()) { + LOG.debug("Removed " + rs.getRegion().getRegionNameAsString() + + " from list of regions to assign because region state: " + rs.getState()); + metaHRIs.remove(rs.getRegion()); + } + } + + for (Map.Entry e : metaHRIs.entrySet()) { + RegionState rit = services.getAssignmentManager().getRegionStates() + .getRegionTransitionState(e.getKey()); + AssignmentManager assignmentManager = this.services.getAssignmentManager(); + if (processDeadRegion(e.getKey(), e.getValue(), assignmentManager, + this.server.getCatalogTracker())) { + ServerName addressFromAM = assignmentManager.getRegionStates().getRegionServerOfRegion( + e.getKey()); + if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting() + && !regionsFromRIT.contains(e.getKey())) { + // Skip regions that were in transition unless CLOSING or + // PENDING_CLOSE + LOG.info("Skip assigning region " + rit.toString()); + } else if (addressFromAM != null && !addressFromAM.equals(this.serverName)) { + LOG.debug("Skip assigning region " + e.getKey().getRegionNameAsString() + + " because it has been opened in " + addressFromAM.getServerName()); + regionsFromRIT.remove(e.getKey()); + } else { + if (rit != null) { + // clean zk node + try { + LOG.info("Reassigning region with rs =" + rit + " and deleting zk node if exists"); + ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), e.getKey()); + } catch (KeeperException ke) { + this.server.abort("Unexpected ZK exception deleting unassigned node " + e.getKey(), + ke); + return null; + } } - // If the table was partially disabled and the RS went down, we should clear the RIT - // and remove the node for the region. - // The rit that we use may be stale in case the table was in DISABLING state - // but though we did assign we will not be clearing the znode in CLOSING state. - // Doing this will have no harm. See HBASE-5927 - if (rit != null - && (rit.isClosing() || rit.isPendingClose()) - && this.services.getAssignmentManager().getZKTable() - .isDisablingOrDisabledTable(rit.getRegion().getTableNameAsString())) { - HRegionInfo hri = rit.getRegion(); - AssignmentManager am = this.services.getAssignmentManager(); - am.deleteClosingOrClosedNode(hri); - am.regionOffline(hri); - // To avoid region assignment if table is in disabling or disabled state. - toAssignRegions.remove(hri); - } + toAssign.add(e.getKey()); } - // Get all available servers - List availableServers = services.getServerManager() - .createDestinationServersList(); - this.services.getAssignmentManager().assign(toAssignRegions, - availableServers); + } else if (rit != null && (rit.isSplitting() || rit.isSplit())) { + // This will happen when the RS went down and the call back for the + // SPLIITING or SPLIT + // has not yet happened for node Deleted event. In that case if the + // region was actually + // split + // but the RS had gone down before completing the split process then + // will not try to + // assign the parent region again. In that case we should make the + // region offline and + // also delete the region from RIT. + HRegionInfo region = rit.getRegion(); + AssignmentManager am = assignmentManager; + am.regionOffline(region); + regionsFromRIT.remove(region); } - } finally { - this.deadServers.finish(serverName); + // If the table was partially disabled and the RS went down, we should + // clear the RIT + // and remove the node for the region. + // The rit that we use may be stale in case the table was in DISABLING + // state + // but though we did assign we will not be clearing the znode in CLOSING + // state. + // Doing this will have no harm. See HBASE-5927 + toAssign = checkForDisablingOrDisabledTables(regionsFromRIT, toAssign, rit, assignmentManager); } - LOG.info("Finished processing of shutdown of " + serverName); + return toAssign; } + private List checkForDisablingOrDisabledTables(Set regionsFromRIT, + List toAssign, RegionState rit, AssignmentManager assignmentManager) { + if (rit == null) { + return toAssign; + } + if (!rit.isClosing() && !rit.isPendingClose()) { + return toAssign; + } + if (!assignmentManager.getZKTable().isDisablingOrDisabledTable( + rit.getRegion().getTableNameAsString())) { + return toAssign; + } + HRegionInfo hri = rit.getRegion(); + AssignmentManager am = assignmentManager; + am.deleteClosingOrClosedNode(hri); + am.regionOffline(hri); + // To avoid region assignment if table is in disabling or disabled state. + toAssign.remove(hri); + regionsFromRIT.remove(hri); + return toAssign; + } /** * Process a dead region from a dead RS. Checks if the region is disabled or * disabling or if the region has a partially completed split. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionState.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionState.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionState.java (working copy) @@ -37,8 +37,18 @@ public class RegionState implements org.apache.hadoop.io.Writable { public enum State { OFFLINE, // region is in an offline state - PENDING_OPEN, // sent rpc to server to open but has not begun - OPENING, // server has begun to open but not yet done + // We have sent the open rpc to the region server. We hold this state until + // regionserver + // assumes control of the region by setting znode to OPENING from OFFLINE + // (This is an + // in-memory state only with no corresponding znode state + PENDING_OPEN, + + // Set by zk callback when regionserver moves znode from OFFLINE to OPENING. + // Regionserver now + // has control of the region. + OPENING, + OPEN, // server opened region and updated meta PENDING_CLOSE, // sent rpc to server to close but has not begun CLOSING, // server has begun to close but not yet done Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (working copy) @@ -25,6 +25,8 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentSkipListSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -325,32 +327,54 @@ /** * A server is offline, all regions on it are dead. */ - public synchronized List serverOffline(final ServerName sn) { + public synchronized Pair, List> serverOffline(final ServerName sn) { // Clean up this server from map of servers to regions, and remove all regions // of this server from online map of regions. + Set deadRegions = new TreeSet(); + List rits = new ArrayList(); Set assignedRegions = serverHoldings.get(sn); - if (assignedRegions == null || assignedRegions.isEmpty()) { - // No regions on this server, we are done, return empty list of RITs - return rits; + if (assignedRegions != null && !assignedRegions.isEmpty()) { + deadRegions.addAll(assignedRegions); + for (HRegionInfo region : deadRegions) { + this.regionAssignments.remove(region); + } } - for (HRegionInfo region : assignedRegions) { - regionAssignments.remove(region); - } - // See if any of the regions that were online on this server were in RIT // If they are, normal timeouts will deal with them appropriately so // let's skip a manual re-assignment. + Set regionsToAssign = new ConcurrentSkipListSet(); for (RegionState state : regionsInTransition.values()) { - if (assignedRegions.contains(state.getRegion())) { + if (deadRegions.contains(state.getRegion())) { rits.add(state); } + // If destination server in RegionState is same as dead server then add to + // regions to assign + // Skip the region in OFFLINE state because destionation server in + // RegionState is master + // server name. Skip the region if the destionation server in RegionState + // is other than dead + // server. + if (state.getServerName().equals(sn)) { + regionsToAssign.add(state.getRegion()); + } } - assignedRegions.clear(); + if (assignedRegions != null) { + assignedRegions.clear(); + } this.notifyAll(); - return rits; + return new Pair, List>(regionsToAssign, rits); } + + /** + * Returns the regions assigned to the given server. + * @param sn + * @return + */ + public synchronized Set getRegionsInAServer(final ServerName sn) { + return this.serverHoldings.get(sn); + } /** * Gets the online regions of the specified table. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java (working copy) @@ -168,8 +168,7 @@ // Done! Region is closed on this RS LOG.debug("Closed region " + region.getRegionNameAsString()); } finally { - this.rsServices.getRegionsInTransitionInRS(). - remove(this.regionInfo.getEncodedNameAsBytes()); + this.rsServices.removeFromRegionsInTransition(this.regionInfo); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (working copy) @@ -54,9 +54,8 @@ // the total open. We'll fail the open if someone hijacks our znode; we can // tell this has happened if version is not as expected. private volatile int version = -1; - //version of the offline node that was set by the master - private volatile int versionOfOfflineNode = -1; + public OpenRegionHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo, HTableDescriptor htd) { @@ -64,20 +63,20 @@ } public OpenRegionHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo, - HTableDescriptor htd, int versionOfOfflineNode) { + HTableDescriptor htd, int version) { this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, - versionOfOfflineNode); + version); } protected OpenRegionHandler(final Server server, final RegionServerServices rsServices, final HRegionInfo regionInfo, final HTableDescriptor htd, EventType eventType, - final int versionOfOfflineNode) { + final int version) { super(server, eventType); this.rsServices = rsServices; this.regionInfo = regionInfo; this.htd = htd; - this.versionOfOfflineNode = versionOfOfflineNode; + this.version = version; } public HRegionInfo getRegionInfo() { @@ -96,15 +95,6 @@ // Check that this region is not already online HRegion region = this.rsServices.getFromOnlineRegions(encodedName); - // If fails, just return. Someone stole the region from under us. - // Calling transitionZookeeperOfflineToOpening initalizes this.version. - if (!transitionZookeeperOfflineToOpening(encodedName, - versionOfOfflineNode)) { - LOG.warn("Region was hijacked? It no longer exists, encodedName=" + - encodedName); - return; - } - // Open region. After a successful open, failures in subsequent // processing needs to do a close as part of cleanup. region = openRegion(); @@ -142,8 +132,7 @@ LOG.debug("Opened " + name + " on server:" + this.server.getServerName()); } finally { - this.rsServices.getRegionsInTransitionInRS(). - remove(this.regionInfo.getEncodedNameAsBytes()); + this.rsServices.removeFromRegionsInTransition(this.regionInfo); } } @@ -365,33 +354,6 @@ if (region != null) region.close(); } - /** - * Transition ZK node from OFFLINE to OPENING. - * @param encodedName Name of the znode file (Region encodedName is the znode - * name). - * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared - * before changing the node's state from OFFLINE - * @return True if successful transition. - */ - boolean transitionZookeeperOfflineToOpening(final String encodedName, - int versionOfOfflineNode) { - // TODO: should also handle transition from CLOSED? - try { - // Initialize the znode version. - this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo, - server.getServerName(), EventType.M_ZK_REGION_OFFLINE, - EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode); - } catch (KeeperException e) { - LOG.error("Error transition from OFFLINE to OPENING for region=" + - encodedName, e); - } - boolean b = isGoodVersion(); - if (!b) { - LOG.warn("Failed transition from OFFLINE to OPENING for region=" + - encodedName); - } - return b; - } /** * Update our OPENING state in zookeeper. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -99,6 +99,7 @@ import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; @@ -199,6 +200,7 @@ import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -2179,9 +2181,14 @@ return this.zooKeeper; } + @Override + public boolean removeFromRegionsInTransition(final HRegionInfo hri) { + return this.regionsInTransitionInRS.remove(hri.getEncodedNameAsBytes()); + } - public ConcurrentSkipListMap getRegionsInTransitionInRS() { - return this.regionsInTransitionInRS; + @Override + public boolean containsKeyInRegionsInTransition(final HRegionInfo hri) { + return this.regionsInTransitionInRS.containsKey(hri.getEncodedNameAsBytes()); } public ExecutorService getExecutorService() { @@ -2447,16 +2454,35 @@ } } - protected void checkIfRegionInTransition(HRegionInfo region, - String currentAction) throws RegionAlreadyInTransitionException { - byte[] encodedName = region.getEncodedNameAsBytes(); - if (this.regionsInTransitionInRS.containsKey(encodedName)) { - boolean openAction = this.regionsInTransitionInRS.get(encodedName); + + /** + * String currentAction) throws RegionAlreadyInTransitionException { Add + * region to this regionservers list of in transitions regions ONLY if its not + * already byte[] encodedName = region.getEncodedNameAsBytes(); in transition. + * If a region already in RIT, we throw + * {@link RegionAlreadyInTransitionException}. if + * (this.regionsInTransitionInRS.containsKey(encodedName)) { Callers need to + * call {@link #removeFromRegionsInTransition(HRegionInfo)} when done or if + * boolean openAction = this.regionsInTransitionInRS.get(encodedName); error + * processing. + * + * @param region + * Region to add + * @param currentAction + * Whether OPEN or CLOSE. + * @throws RegionAlreadyInTransitionException + */ + protected void addRegionsInTransition(final HRegionInfo region, final String currentAction) + throws RegionAlreadyInTransitionException { + Boolean action = this.regionsInTransitionInRS.putIfAbsent(region.getEncodedNameAsBytes(), + currentAction.equals(OPEN)); + if (action != null) { // The below exception message will be used in master. - throw new RegionAlreadyInTransitionException("Received:" + currentAction + - " for the region:" + region.getRegionNameAsString() + - " ,which we are already trying to " + - (openAction ? OPEN : CLOSE)+ "."); + // The below exception message will be used in master. + throw new RegionAlreadyInTransitionException("Received:" + currentAction + " for the region:" + + region.getRegionNameAsString() + " for the region:" + region.getRegionNameAsString() + + ", which we are already trying to " + " ,which we are already trying to " + + (action ? OPEN : CLOSE) + "."); } } @@ -2486,23 +2512,29 @@ */ protected boolean closeRegion(HRegionInfo region, final boolean abort, final boolean zk, final int versionOfClosingNode, ServerName sn) { - if (this.regionsInTransitionInRS.containsKey(region.getEncodedNameAsBytes())) { - LOG.warn("Received close for region we are already opening or closing; " + - region.getEncodedName()); + try { + addRegionsInTransition(region, CLOSE); + } catch (RegionAlreadyInTransitionException rate) { + LOG.warn("Received close for region we are already opening or closing; " + + region.getEncodedName()); return false; } - this.regionsInTransitionInRS.putIfAbsent(region.getEncodedNameAsBytes(), false); - CloseRegionHandler crh = null; - if (region.isRootRegion()) { - crh = new CloseRootHandler(this, this, region, abort, zk, - versionOfClosingNode); - } else if (region.isMetaRegion()) { - crh = new CloseMetaHandler(this, this, region, abort, zk, - versionOfClosingNode); - } else { - crh = new CloseRegionHandler(this, this, region, abort, zk, versionOfClosingNode, sn); + boolean success = false; + try { + CloseRegionHandler crh = null; + if (region.isRootRegion()) { + crh = new CloseRootHandler(this, this, region, abort, zk, versionOfClosingNode); + } else if (region.isMetaRegion()) { + crh = new CloseMetaHandler(this, this, region, abort, zk, versionOfClosingNode); + } else { + crh = new CloseRegionHandler(this, this, region, abort, zk, versionOfClosingNode, sn); + } + this.service.submit(crh); + success = true; + } finally { + // Remove from this server's RIT. + if (!success) removeFromRegionsInTransition(region); } - this.service.submit(crh); return true; } @@ -3405,7 +3437,10 @@ for (RegionInfo regionInfo : request.getRegionList()) { HRegionInfo region = HRegionInfo.convert(regionInfo); try { - checkIfRegionInTransition(region, OPEN); + // Added to in-memory RS RIT that we are trying to open this region. + // Clear it if we fail + // queuing an open executor. + addRegionsInTransition(region, OPEN); HRegion onlineRegion = getFromOnlineRegions(region.getEncodedName()); if (null != onlineRegion) { // See HBASE-5094. Cross check with META if still this RS is owning @@ -3430,21 +3465,24 @@ htd = this.tableDescriptors.get(region.getTableName()); htds.put(region.getTableNameAsString(), htd); } - this.regionsInTransitionInRS.putIfAbsent( - region.getEncodedNameAsBytes(), true); + // Mark the region as OPENING up in zk. This is how we tell the master control of the + // region has passed to this regionserver. + int version = transitionZookeeperOfflineToOpening(region, versionOfOfflineNode); // Need to pass the expected version in the constructor. if (region.isRootRegion()) { this.service.submit(new OpenRootHandler(this, this, region, htd, - versionOfOfflineNode)); + version)); } else if (region.isMetaRegion()) { this.service.submit(new OpenMetaHandler(this, this, region, htd, - versionOfOfflineNode)); + version)); } else { this.service.submit(new OpenRegionHandler(this, this, region, htd, - versionOfOfflineNode)); + version)); } builder.addOpeningState(RegionOpeningState.OPENED); } catch (RegionAlreadyInTransitionException rie) { + // Clear from this server's RIT list else will stick around for ever. + removeFromRegionsInTransition(region); LOG.warn("Region is already in transition", rie); if (isBulkAssign) { builder.addOpeningState(RegionOpeningState.OPENED); @@ -3453,6 +3491,8 @@ } } catch (IOException ie) { LOG.warn("Failed opening region " + region.getRegionNameAsString(), ie); + // Clear from this server's RIT list else will stick around for ever. + removeFromRegionsInTransition(region); if (isBulkAssign) { builder.addOpeningState(RegionOpeningState.FAILED_OPENING); } else { @@ -3464,7 +3504,40 @@ } + /** + * Transition ZK node from OFFLINE to OPENING. The master will get a callback + * and will know that the region is now ours. + * + * @param hri + * HRegionInfo whose znode we are updating + * @param versionOfOfflineNode + * Version Of OfflineNode that needs to be compared before changing + * the node's state from OFFLINE + * @throws IOException + */ + int transitionZookeeperOfflineToOpening(final HRegionInfo hri, int versionOfOfflineNode) + throws IOException { + // TODO: should also handle transition from CLOSED? + int version = -1; + try { + // Initialize the znode version. + version = ZKAssign.transitionNode(this.zooKeeper, hri, this.getServerName(), + EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode); + } catch (KeeperException e) { + LOG.error("Error transition from OFFLINE to OPENING for region=" + hri.getEncodedName(), e); + } + if (version == -1) { + // TODO: Fix this sloppyness. The exception should be coming off zk + // directly, not an + // intepretation at this high-level (-1 when we call transitionNode can + // mean many things). + throw new IOException("Failed transition from OFFLINE to OPENING for region=" + + hri.getEncodedName()); + } + return version; + } + /** * Close a region on the region server. * * @param controller the RPC controller @@ -3493,7 +3566,6 @@ ". Version of ZK closing node:" + versionOfClosingNode + ". Destination server:" + sn); HRegionInfo regionInfo = region.getRegionInfo(); - checkIfRegionInTransition(regionInfo, CLOSE); boolean closed = closeRegion( regionInfo, false, zk, versionOfClosingNode, sn); builder.setClosed(closed); @@ -3935,4 +4007,5 @@ private String getMyEphemeralNodePath() { return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString()); } + } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java (revision 1371332) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerServices.java (working copy) @@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.regionserver.wal.HLog; @@ -77,10 +78,18 @@ public RpcServer getRpcServer(); /** - * Get the regions that are currently being opened or closed in the RS - * @return map of regions in transition in this RS + * Remove passed hri from the internal list of regions in transition on this + * regionserver. + * @param hri Region to remove. + * @return True if removed */ - public Map getRegionsInTransitionInRS(); + public boolean removeFromRegionsInTransition(HRegionInfo hri); + /** + * @param hri + * @return True if the internal list of regions in transition includes the + * passed hri. + */ + public boolean containsKeyInRegionsInTransition(HRegionInfo hri); /** * @return Return the FileSystem object used by the regionserver Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (working copy) @@ -169,4 +169,18 @@ RegionTransition rt = RegionTransition.parseFrom(existingBytes); return rt.getEventType().equals(expectedState); } + + static void waitForRegionOfflineInRIT(AssignmentManager am, String encodedName) + throws InterruptedException { + boolean wait = true; + while (wait) { + RegionState state = am.getRegionStates().getRegionsInTransition().get(encodedName); + if (state != null && state.isOffline()) { + wait = false; + } else { + Thread.sleep(1); + } + } + + } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java (working copy) @@ -324,11 +324,6 @@ return null; } - @Override - public Map getRegionsInTransitionInRS() { - // TODO Auto-generated method stub - return null; - } @Override public FileSystem getFileSystem() { @@ -515,4 +510,16 @@ // TODO Auto-generated method stub return null; } + + @Override + public boolean removeFromRegionsInTransition(HRegionInfo hri) { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean containsKeyInRegionsInTransition(HRegionInfo hri) { + // TODO Auto-generated method stub + return false; + } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (working copy) @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.RegionException; import org.apache.hadoop.hbase.RegionTransition; @@ -196,11 +197,17 @@ int versionid = ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); assertNotSame(versionid, -1); - Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); + Mocking.waitForRegionOfflineInRIT(am, REGIONINFO.getEncodedName()); - // Get current versionid else will fail on transition from OFFLINE to + // Get the OFFLINE version id. May have to wait some for it to happen. // OPENING below - versionid = ZKAssign.getVersion(this.watcher, REGIONINFO); + while (true) { + int vid = ZKAssign.getVersion(this.watcher, REGIONINFO); + if (vid != versionid) { + versionid = vid; + break; + } + } assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, @@ -238,11 +245,17 @@ ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); assertNotSame(versionid, -1); am.gate.set(false); - Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); + Mocking.waitForRegionOfflineInRIT(am, REGIONINFO.getEncodedName()); // Get current versionid else will fail on transition from OFFLINE to // OPENING below - versionid = ZKAssign.getVersion(this.watcher, REGIONINFO); + while (true) { + int vid = ZKAssign.getVersion(this.watcher, REGIONINFO); + if (vid != versionid) { + versionid = vid; + break; + } + } assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, @@ -279,12 +292,18 @@ int versionid = ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); assertNotSame(versionid, -1); - Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); + Mocking.waitForRegionOfflineInRIT(am, REGIONINFO.getEncodedName()); am.gate.set(false); // Get current versionid else will fail on transition from OFFLINE to // OPENING below - versionid = ZKAssign.getVersion(this.watcher, REGIONINFO); + while (true) { + int vid = ZKAssign.getVersion(this.watcher, REGIONINFO); + if (vid != versionid) { + versionid = vid; + break; + } + } assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, @@ -360,10 +379,16 @@ // balancer. The zk node will be OFFLINE waiting for regionserver to // transition it through OPENING, OPENED. Wait till we see the OFFLINE // zk node before we proceed. - Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); + Mocking.waitForRegionOfflineInRIT(am, REGIONINFO.getEncodedName()); // Get current versionid else will fail on transition from OFFLINE to OPENING below - versionid = ZKAssign.getVersion(this.watcher, REGIONINFO); + while (true) { + int vid = ZKAssign.getVersion(this.watcher, REGIONINFO); + if (vid != versionid) { + versionid = vid; + break; + } + } assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, @@ -402,9 +427,9 @@ .getConfiguration()); // Create an AM. AssignmentManager am = new AssignmentManager(this.server, - this.serverManager, ct, balancer, executor, null); + this.serverManager, ct, balancer, executor, null); try { - processServerShutdownHandler(ct, am, false); + processServerShutdownHandler(ct, am, false, null); } finally { executor.shutdown(); am.shutdown(); @@ -469,7 +494,7 @@ ZKUtil.createAndWatch(this.watcher, node, data.toByteArray()); try { - processServerShutdownHandler(ct, am, regionSplitDone); + processServerShutdownHandler(ct, am, regionSplitDone, null); // check znode deleted or not. // In both cases the znode should be deleted. @@ -506,7 +531,7 @@ // adding region to regions and servers maps. am.regionOnline(REGIONINFO, SERVERNAME_A); // adding region in pending close. - am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE); + am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE, System.currentTimeMillis(), SERVERNAME_A); if (state == Table.State.DISABLING) { am.getZKTable().setDisablingTable(REGIONINFO.getTableNameAsString()); } else { @@ -521,7 +546,7 @@ // create znode in M_ZK_REGION_CLOSING state. ZKUtil.createAndWatch(this.watcher, node, data.toByteArray()); try { - processServerShutdownHandler(ct, am, false); + processServerShutdownHandler(ct, am, false, null); // check znode deleted or not. // In both cases the znode should be deleted. assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1); @@ -540,8 +565,59 @@ ZKAssign.deleteAllNodes(this.watcher); } } + + /** + * When region in transition if region server opening the region gone down then region assignment + * taking long time(Waiting for timeout monitor to trigger assign). HBASE-5396(HBASE-6060) fixes this + * scenario. This test case verifies whether SSH calling assign for the region in transition or not. + * + * @throws KeeperException + * @throws IOException + * @throws ServiceException + */ + @Test + public void testSSHWhenSourceRSandDestRSInRegionPlanGoneDown() throws KeeperException, IOException, + ServiceException { + testSSHWhenSourceRSandDestRSInRegionPlanGoneDown(true); + testSSHWhenSourceRSandDestRSInRegionPlanGoneDown(false); + } + + private void testSSHWhenSourceRSandDestRSInRegionPlanGoneDown(boolean regionInOffline) + throws IOException, KeeperException, ServiceException { + // We need a mocked catalog tracker. + CatalogTracker ct = Mockito.mock(CatalogTracker.class); + // Create an AM. + AssignmentManagerWithExtrasForTesting am = + setUpMockedAssignmentManager(this.server, this.serverManager); + // adding region in pending open. + if (regionInOffline) { + ServerName MASTER_SERVERNAME = new ServerName("example.org", 1111, 1111); + am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO, + State.OFFLINE, System.currentTimeMillis(), MASTER_SERVERNAME)); + } else { + am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO, + State.OPENING, System.currentTimeMillis(), SERVERNAME_B)); + } + // adding region plan + am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B)); + am.getZKTable().setEnabledTable(REGIONINFO.getTableNameAsString()); + + try { + processServerShutdownHandler(ct, am, false, SERVERNAME_A); + processServerShutdownHandler(ct, am, false, SERVERNAME_B); + if(regionInOffline){ + assertFalse("Assign should not be invoked.", am.assignInvoked); + } else { + assertTrue("Assign should be invoked.", am.assignInvoked); + } + } finally { + am.getRegionStates().regionsInTransition.remove(REGIONINFO.getEncodedName()); + am.regionPlans.remove(REGIONINFO.getEncodedName()); + } + } - private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion) + private void processServerShutdownHandler(CatalogTracker ct, + AssignmentManager am, boolean splitRegion, ServerName sn) throws IOException, ServiceException { // Make sure our new AM gets callbacks; once registered, can't unregister. // Thats ok because we make a new zk watcher for each test. @@ -553,10 +629,19 @@ // Get a meta row result that has region up on SERVERNAME_A Result r = null; - if (splitRegion) { - r = Mocking.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A); + if (sn == null) { + if (splitRegion) { + r = Mocking + .getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A); + } else { + r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + } } else { - r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + if (sn.equals(SERVERNAME_A)) { + r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + } else if (sn.equals(SERVERNAME_B)) { + r = new Result(new KeyValue[0]); + } } ScanResponse.Builder builder = ScanResponse.newBuilder(); @@ -585,8 +670,12 @@ Mockito.when(services.getAssignmentManager()).thenReturn(am); Mockito.when(services.getServerManager()).thenReturn(this.serverManager); Mockito.when(services.getZooKeeper()).thenReturn(this.watcher); - ServerShutdownHandler handler = new ServerShutdownHandler(this.server, - services, deadServers, SERVERNAME_A, false); + ServerShutdownHandler handler = null; + if (sn != null) { + handler = new ServerShutdownHandler(this.server, services, deadServers, sn, false); + } else { + handler = new ServerShutdownHandler(this.server, services, deadServers, SERVERNAME_A, false); + } handler.process(); // The region in r will have been assigned. It'll be up in zk as unassigned. } @@ -738,7 +827,7 @@ assertTrue("Destination servers should be different.", !(regionPlan .getDestination().equals(newRegionPlan.getDestination()))); - Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); + Mocking.waitForRegionOfflineInRIT(am, REGIONINFO.getEncodedName()); } finally { this.server.getConfiguration().setClass( HConstants.HBASE_MASTER_LOADBALANCER_CLASS, DefaultLoadBalancer.class, @@ -992,7 +1081,11 @@ @Override public void assign(java.util.List regions, java.util.List servers) { - assignInvoked = true; + if (!regions.isEmpty()) { + assignInvoked = true; + } else { + assignInvoked = false; + } }; /** reset the watcher */ Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKBasedOpenCloseRegion.java (working copy) @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.master.handler.TotesHRegionInfo; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Writables; @@ -70,6 +71,8 @@ @BeforeClass public static void beforeAllTests() throws Exception { Configuration c = TEST_UTIL.getConfiguration(); + c.setClass(HConstants.REGION_SERVER_IMPL, TestZKBasedOpenCloseRegionRegionServer.class, + HRegionServer.class); c.setBoolean("dfs.support.append", true); c.setInt("hbase.regionserver.info.port", 0); TEST_UTIL.startMiniCluster(2); @@ -94,6 +97,22 @@ } waitUntilAllRegionsAssigned(); } + + /** + * Special HRegionServer used in these tests that allows access to + * {@link #addRegionsInTransition(HRegionInfo, String)}. + */ + public static class TestZKBasedOpenCloseRegionRegionServer extends HRegionServer { + public TestZKBasedOpenCloseRegionRegionServer(Configuration conf) + throws IOException, InterruptedException { + super(conf); + } + @Override + public void addRegionsInTransition(HRegionInfo region, + String currentAction) throws RegionAlreadyInTransitionException { + super.addRegionsInTransition(region, currentAction); + } + } /** * Test we reopen a region once closed. @@ -245,9 +264,13 @@ cluster.getLiveRegionServerThreads().get(1).getRegionServer(); HRegionInfo hri = getNonMetaRegion(ProtobufUtil.getOnlineRegions(hr0)); - // fake that hr1 is processing the region - hr1.getRegionsInTransitionInRS().putIfAbsent(hri.getEncodedNameAsBytes(), true); + // Fake that hr1 is processing the region. At top of this test we made a + // regionserver that + // gave access addRegionsInTransition. Need to cast as + // TestZKBasedOpenCloseRegionRegionServer. + ((TestZKBasedOpenCloseRegionRegionServer) hr1).addRegionsInTransition(hri, "OPEN"); + AtomicBoolean reopenEventProcessed = new AtomicBoolean(false); EventHandlerListener openListener = new ReopenEventListener(hri.getRegionNameAsString(), @@ -263,7 +286,7 @@ assertEquals(hr1.getOnlineRegion(hri.getEncodedNameAsBytes()), null); // remove the block and reset the boolean - hr1.getRegionsInTransitionInRS().remove(hri.getEncodedNameAsBytes()); + hr1.removeFromRegionsInTransition(hri); reopenEventProcessed.set(false); // now try moving a region when there is no region in transition. @@ -334,7 +357,7 @@ } Whitebox.setInternalState(regionServer, "tableDescriptors", orizinalState); assertFalse("Region should not be in RIT", - regionServer.getRegionsInTransitionInRS().containsKey(REGIONINFO.getEncodedNameAsBytes())); + regionServer.containsKeyInRegionsInTransition(REGIONINFO)); } private static void waitUntilAllRegionsAssigned() Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java (working copy) @@ -209,7 +209,8 @@ throws IOException, NodeExistsException, KeeperException, DeserializationException { // Create it OFFLINE node, which is what Master set before sending OPEN RPC ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName()); - OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd); + int version = ZKAssign.transitionNodeOpening(server.getZooKeeper(), hri, server.getServerName()); + OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, htd, version); openHandler.process(); // This parse is not used? RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName())); Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (working copy) @@ -134,6 +134,7 @@ // Create it OFFLINE, which is what it expects ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server.getServerName()); + ZKAssign.transitionNodeOpening(server.getZooKeeper(), TEST_HRI, server.getServerName()); // Create the handler OpenRegionHandler handler = @@ -159,7 +160,7 @@ // Create it OFFLINE, which is what it expects ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server.getServerName()); - + ZKAssign.transitionNodeOpening(server.getZooKeeper(), TEST_HRI, server.getServerName()); // Create the handler OpenRegionHandler handler = new OpenRegionHandler(server, rsServices, TEST_HRI, TEST_HTD) { Index: hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (revision 1371332) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (working copy) @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.fs.HFileSystem; @@ -91,11 +92,6 @@ } @Override - public ConcurrentSkipListMap getRegionsInTransitionInRS() { - return rit; - } - - @Override public FlushRequester getFlushRequester() { return null; } @@ -162,4 +158,16 @@ public Leases getLeases() { return null; } + + @Override + public boolean removeFromRegionsInTransition(HRegionInfo hri) { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean containsKeyInRegionsInTransition(HRegionInfo hri) { + // TODO Auto-generated method stub + return false; + } }