Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1001312) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -1924,7 +1924,8 @@ // TODO: Need to check if this is being served here but currently undergoing // a split (so master needs to retry close after split is complete) if (!onlineRegions.containsKey(region.getEncodedName())) { - LOG.warn("Received close for region we are not serving"); + LOG.warn("Received close for region we are not serving; " + + region.getEncodedName()); throw new NotServingRegionException("Received close for " + region.getRegionNameAsString() + " but we are not serving it"); } Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1001312) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -285,7 +285,7 @@ } for (HMsg msg: msgs) { - LOG.info("Received " + msg); + LOG.info("Received " + msg + " from " + serverInfo.getServerName()); switch (msg.getType()) { case REGION_SPLIT: this.services.getAssignmentManager().handleSplitReport(serverInfo, @@ -527,8 +527,9 @@ throws NotServingRegionException { HRegionInterface hri = getServerConnection(server); if(hri == null) { - LOG.warn("Attempting to send CLOSE RPC to server " + server.getServerName() - + " failed because no RPC connection found to this server"); + LOG.warn("Attempting to send CLOSE RPC to server " + + server.getServerName() + " failed because no RPC connection found " + + "to this server"); return; } hri.closeRegion(region); Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1001312) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -60,8 +60,8 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTableDisable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; +import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.io.Writable; import org.apache.zookeeper.KeeperException; @@ -234,7 +234,7 @@ * yet). * @param data */ - private void handleRegion(RegionTransitionData data) { + private void handleRegion(final RegionTransitionData data) { synchronized(regionsInTransition) { // Verify this is a known server if (!serverManager.isServerOnline(data.getServerName()) && @@ -248,7 +248,7 @@ LOG.debug("Handling transition=" + data.getEventType() + ", server=" + data.getServerName() + ", region=" + prettyPrintedRegionName); RegionState regionState = regionsInTransition.get(encodedName); - switch(data.getEventType()) { + switch (data.getEventType()) { case M_ZK_REGION_OFFLINE: // Nothing to do. break; @@ -259,9 +259,9 @@ if (regionState == null || (!regionState.isPendingClose() && !regionState.isClosing())) { LOG.warn("Received CLOSING for region " + prettyPrintedRegionName + - " from server " + data.getServerName() + " but region was in " + - " the state " + regionState + " and not " + - "in expected PENDING_CLOSE or CLOSING states"); + " from server " + data.getServerName() + " but region was in " + + " the state " + regionState + " and not " + + "in expected PENDING_CLOSE or CLOSING states"); return; } // Transition to CLOSING (or update stamp if already CLOSING) @@ -422,12 +422,21 @@ * @param serverInfo */ public void regionOnline(HRegionInfo regionInfo, HServerInfo serverInfo) { - synchronized(regionsInTransition) { - regionsInTransition.remove(regionInfo.getEncodedName()); - regionsInTransition.notifyAll(); + synchronized (this.regionsInTransition) { + RegionState rs = + this.regionsInTransition.remove(regionInfo.getEncodedName()); + if (rs != null) { + this.regionsInTransition.notifyAll(); + LOG.warn("Asked online a region that was already in " + + "regionsInTransition: " + rs); + } } - synchronized(regions) { - regions.put(regionInfo, serverInfo); + synchronized (this.regions) { + // Add check + HServerInfo hsi = this.regions.get(regionInfo); + if (hsi != null) LOG.warn("Overwriting " + regionInfo.getEncodedName() + + " on " + hsi); + this.regions.put(regionInfo, serverInfo); addToServers(serverInfo, regionInfo); } } @@ -440,15 +449,20 @@ * @param regionInfo * @param serverInfo */ - public void regionOffline(HRegionInfo regionInfo) { - synchronized(regionsInTransition) { - regionsInTransition.remove(regionInfo.getEncodedName()); - regionsInTransition.notifyAll(); + public void regionOffline(final HRegionInfo regionInfo) { + synchronized(this.regionsInTransition) { + if (this.regionsInTransition.remove(regionInfo.getEncodedName()) != null) { + this.regionsInTransition.notifyAll(); + } } - synchronized(regions) { - HServerInfo serverInfo = regions.remove(regionInfo); - List serverRegions = servers.get(serverInfo); - serverRegions.remove(regionInfo); + synchronized(this.regions) { + HServerInfo serverInfo = this.regions.remove(regionInfo); + if (serverInfo != null) { + List serverRegions = this.servers.get(serverInfo); + serverRegions.remove(regionInfo); + } else { + LOG.warn("Asked offline a region that was not online: " + regionInfo); + } } } @@ -580,17 +594,17 @@ region.getRegionNameAsString() + " (offlining)"); // Check if this region is currently assigned if (!regions.containsKey(region)) { - LOG.debug("Attempted to unassign region " + region.getRegionNameAsString() + - " but it is not " + + LOG.debug("Attempted to unassign region " + + region.getRegionNameAsString() + " but it is not " + "currently assigned anywhere"); return; } String encodedName = region.getEncodedName(); // Grab the state of this region and synchronize on it RegionState state; - synchronized(regionsInTransition) { + synchronized (regionsInTransition) { state = regionsInTransition.get(encodedName); - if(state == null) { + if (state == null) { state = new RegionState(region, RegionState.State.PENDING_CLOSE); regionsInTransition.put(encodedName, state); } else { @@ -923,27 +937,29 @@ HRegionInfo regionInfo = regionState.getRegion(); LOG.info("Regions in transition timed out: " + regionState); // Expired! Do a retry. - switch(regionState.getState()) { + switch (regionState.getState()) { case OFFLINE: case CLOSED: LOG.info("Region has been OFFLINE or CLOSED for too long, " + - "reassigning " + regionInfo.getRegionNameAsString()); + "reassigning " + regionInfo.getRegionNameAsString()); assign(regionState.getRegion()); break; case PENDING_OPEN: case OPENING: LOG.info("Region has been PENDING_OPEN or OPENING for too " + - "long, reassigning " + regionInfo.getRegionNameAsString()); + "long, reassigning region=" + + regionInfo.getRegionNameAsString()); assign(regionState.getRegion()); break; case OPEN: LOG.warn("Long-running region in OPEN state? This should " + - "not happen"); + "not happen; region=" + regionInfo.getRegionNameAsString()); break; case PENDING_CLOSE: case CLOSING: LOG.info("Region has been PENDING_CLOSE or CLOSING for too " + - "long, resending close rpc"); + "long, running unassign again on region=" + + regionInfo.getRegionNameAsString()); unassign(regionInfo); break; } @@ -1017,63 +1033,11 @@ */ public void handleSplitReport(final HServerInfo hsi, final HRegionInfo parent, final HRegionInfo a, final HRegionInfo b) { - synchronized (this.regions) { - checkRegion(hsi, parent, true); - checkRegion(hsi, a, false); - checkRegion(hsi, b, false); - this.regions.put(a, hsi); - this.regions.put(b, hsi); - removeFromServers(hsi, parent, true); - removeFromServers(hsi, a, false); - removeFromServers(hsi, b, false); - addToServers(hsi, a); - addToServers(hsi, b); - } + regionOffline(parent); + regionOnline(a, hsi); + regionOnline(b, hsi); } - /* - * Caller must hold locks on this.regions Map. - * @param hsi - * @param hri - * @param expected True if we expect hri to be in this.regions. - */ - private void checkRegion(final HServerInfo hsi, final HRegionInfo hri, - final boolean expected) { - HServerInfo serverInfo = regions.remove(hri); - if (expected) { - if (serverInfo == null) { - LOG.info("Region not on a server: " + hri.getRegionNameAsString()); - } - } else { - if (serverInfo != null) { - LOG.warn("Region present on " + hsi + "; unexpected"); - } - } - } - - /* - * Caller must hold locks on servers Map. - * @param hsi - * @param hri - * @param expected - */ - private void removeFromServers(final HServerInfo hsi, final HRegionInfo hri, - final boolean expected) { - List serverRegions = this.servers.get(hsi); - boolean removed = serverRegions.remove(hri); - if (expected) { - if (!removed) { - LOG.warn(hri.getRegionNameAsString() + " not found on " + hsi + - "; unexpected"); - } - } else { - if (removed) { - LOG.warn(hri.getRegionNameAsString() + " found on " + hsi + - "; unexpected"); - } - } - } - /** * @return A clone of current assignments. Note, this is assignments only. * If a new server has come in and it has no regions, it will not be included Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1001312) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -572,9 +572,11 @@ synchronized (this.balancer) { // Only allow one balance run at at time. if (this.assignmentManager.isRegionsInTransition()) { - LOG.debug("Not running balancer because regions in transition: " + + LOG.debug("Not running balancer because " + + this.assignmentManager.getRegionsInTransition().size() + + " region(s) in transition: " + org.apache.commons.lang.StringUtils. - abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 64)); + abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256)); return false; } if (!this.serverManager.getDeadServers().isEmpty()) { Index: src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java (revision 1001312) +++ src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java (working copy) @@ -68,7 +68,7 @@ * {@link EventType#M2ZK_REGION_OFFLINE}. * * @param eventType type of event - * @param regionName name of region + * @param regionName name of region as per {@link HRegionInfo#getRegionName()} */ public RegionTransitionData(EventType eventType, byte [] regionName) { this(eventType, regionName, null); @@ -85,7 +85,7 @@ * and {@link EventType#RS2ZK_REGION_OPENED}. * * @param eventType type of event - * @param regionName name of region + * @param regionName name of region as per {@link HRegionInfo#getRegionName()} * @param serverName name of server setting data */ public RegionTransitionData(EventType eventType, byte [] regionName, @@ -117,7 +117,7 @@ * Gets the name of the region being transitioned. * *

Region name is required so this never returns null. - * @return region name + * @return region name, the result of a call to {@link HRegionInfo#getRegionName()} */ public byte [] getRegionName() { return regionName;