Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1299561) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -214,6 +214,12 @@ protected final Map onlineRegions = new ConcurrentHashMap(); + /** + * Set of regions currently being splitted on this region server. Element is + * the encoded region name. All access should be synchronized. + */ + private final Set splittingRegions = new HashSet(); + protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); final int numRetries; @@ -2768,6 +2774,13 @@ ". Version of ZK closing node:" + versionOfClosingNode); boolean hasit = this.onlineRegions.containsKey(region.getEncodedName()); if (!hasit) { + synchronized (splittingRegions) { + if (splittingRegions.contains(region.getEncodedName())) { + LOG.info("Received close for region we are splitting; " + + region.getEncodedName()); + return false; + } + } LOG.warn("Received close for region we are not serving; " + region.getEncodedName()); throw new NotServingRegionException("Received close for " @@ -3027,6 +3040,20 @@ return toReturn != null; } + @Override + public void addToSplittingRegions(String encodedRegionName) { + synchronized (splittingRegions) { + splittingRegions.add(encodedRegionName); + } + } + + @Override + public boolean removeFromSplittingRegions(String encodedRegionName) { + synchronized (splittingRegions) { + return splittingRegions.remove(encodedRegionName); + } + } + /** * @return A new Map of online regions sorted by region size with the first * entry being the biggest. @@ -3715,4 +3742,5 @@ LOG.info("Registered RegionServer MXBean"); } + } Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1299561) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1988,34 +1988,19 @@ region.getRegionNameAsString()); return; } - // This never happens. Currently regionserver close always return true. + // Currently regionserver close will return false if the region is + // splitting. LOG.warn("Server " + server + " region CLOSE RPC returned false for " + region.getRegionNameAsString()); } catch (NotServingRegionException nsre) { LOG.info("Server " + server + " returned " + nsre + " for " + region.getRegionNameAsString()); - // Presume that master has stale data. Presume remote side just split. - // Presume that the split message when it comes in will fix up the master's - // in memory cluster state. + cancelClosingRegionIfDisabling(region); } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); if (t instanceof NotServingRegionException) { - if (checkIfRegionBelongsToDisabling(region)) { - // Remove from the regionsinTransition map - LOG.info("While trying to recover the table " - + region.getTableNameAsString() - + " to DISABLED state the region " + region - + " was offlined but the table was in DISABLING state"); - synchronized (this.regionsInTransition) { - this.regionsInTransition.remove(region.getEncodedName()); - } - // Remove from the regionsMap - synchronized (this.regions) { - this.regions.remove(region); - } - deleteClosingOrClosedNode(region); - } + cancelClosingRegionIfDisabling(region); } // RS is already processing this region, only need to update the timestamp if (t instanceof RegionAlreadyInTransitionException) { @@ -2028,7 +2013,29 @@ // Presume retry or server will expire. } } - + + /** + * Cancel closing region if it is disabling. Currently it is only called after + * region CLOSE RPC returned {@link NotServingRegionException} + * @param region + */ + private void cancelClosingRegionIfDisabling(HRegionInfo region) { + if (checkIfRegionBelongsToDisabling(region)) { + // Remove from the regionsinTransition map + LOG.info("While trying to recover the table " + + region.getTableNameAsString() + " to DISABLED state the region " + + region + " was offlined but the table was in DISABLING state"); + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(region.getEncodedName()); + } + // Remove from the regionsMap + synchronized (this.regions) { + this.regions.remove(region); + } + deleteClosingOrClosedNode(region); + } + } + private void deleteClosingOrClosedNode(HRegionInfo region) { try { if (!ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(), Index: src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (revision 1299561) +++ src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (working copy) @@ -274,6 +274,7 @@ } if (!testing) { + services.addToSplittingRegions(this.parent.getRegionInfo().getEncodedName()); services.removeFromOnlineRegions(this.parent.getRegionInfo().getEncodedName()); } this.journal.add(JournalEntry.OFFLINED_PARENT); @@ -452,6 +453,8 @@ PairOfSameType regions = createDaughters(server, services); openDaughters(server, services, regions.getFirst(), regions.getSecond()); transitionZKNode(server, regions.getFirst(), regions.getSecond()); + //Split transaction is completed, remove parent region from SplittingRegions. + services.removeFromSplittingRegions(parent.getRegionInfo().getEncodedName()); return regions; } @@ -771,7 +774,11 @@ break; case OFFLINED_PARENT: - if (services != null) services.addToOnlineRegions(this.parent); + if (services != null) { + services.addToOnlineRegions(this.parent); + services.removeFromSplittingRegions(this.parent.getRegionInfo() + .getEncodedName()); + } break; case PONR: Index: src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java (revision 1299561) +++ src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java (working copy) @@ -38,6 +38,12 @@ public void addToOnlineRegions(final HRegion r); /** + * Add to splitting regions. + * @param encodedRegionName + */ + public void addToSplittingRegions(String encodedRegionName); + + /** * This method removes HRegion corresponding to hri from the Map of onlineRegions. * * @param encodedRegionName @@ -46,6 +52,12 @@ public boolean removeFromOnlineRegions(String encodedRegionName); /** + * This method removes region from the set of splittingRegions. + * @param True if we removed a region from splitting region list. + */ + public boolean removeFromSplittingRegions(String encodedRegionName); + + /** * Return {@link HRegion} instance. * Only works if caller is in same context, in same JVM. HRegion is not * serializable. Index: src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (revision 1299561) +++ src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (working copy) @@ -159,4 +159,16 @@ public void setFileSystem(FileSystem hfs) { this.hfs = (HFileSystem)hfs; } + + @Override + public void addToSplittingRegions(String encodedRegionName) { + // TODO Auto-generated method stub + + } + + @Override + public boolean removeFromSplittingRegions(String encodedRegionName) { + // TODO Auto-generated method stub + return false; + } }