Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1145463) +++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy) @@ -314,7 +314,7 @@ * @param region region to open * @throws IOException */ - public void openRegion(final HRegionInfo region) throws IOException; + public boolean openRegion(final HRegionInfo region) throws IOException; /** * Opens the specified regions. Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1145463) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.catalog.RootLocationEditor; @@ -59,6 +60,8 @@ import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; +import org.apache.hadoop.hbase.master.handler.DisableTableHandler; +import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.util.Bytes; @@ -1039,7 +1042,28 @@ // Transition RegionState to PENDING_OPEN state.update(RegionState.State.PENDING_OPEN); // Send OPEN RPC. This can fail if the server on other end is is not up. - serverManager.sendRegionOpen(plan.getDestination(), state.getRegion()); + boolean isRegionOpen = serverManager.sendRegionOpen(plan + .getDestination(), state.getRegion()); + if (false == isRegionOpen) { + // Remove region from in-memory transition and unassigned node from ZK + // While trying to enable the table the regions of the table were + // already enabled. + try { + ZKAssign.deleteOfflineNode(master.getZooKeeper(), state.getRegion() + .getEncodedName()); + } catch (KeeperException e) { + master.abort( + "Error deleting OFFLINED node in ZK for transition ZK node (" + + state.getRegion().getEncodedName() + ")", e); + } + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(plan.getRegionInfo() + .getEncodedName()); + } + synchronized (this.regions) { + this.regions.put(plan.getRegionInfo(), plan.getDestination()); + } + } break; } catch (Throwable t) { LOG.warn("Failed assignment of " + @@ -1248,6 +1272,22 @@ } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); + if (t instanceof NotServingRegionException) { + if (checkIfRegionBelongsToDisabling(region)) { + // Remove from the regionsinTransition map + LOG.info("While trying to recover the table " + + region.getTableDesc().getNameAsString() + + " to DISABLED state the region " + region + + " was offlined but the table was in DISABLING state"); + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(region.getEncodedName()); + } + // Remove from the regionsMap + synchronized (this.regions) { + this.regions.remove(region); + } + } + } } LOG.info("Server " + server + " returned " + t + " for " + region.getEncodedName()); @@ -1516,14 +1556,19 @@ * @return map of servers not online to their assigned regions, as stored * in META * @throws IOException + * @throws KeeperException */ private Map>> rebuildUserRegions() - throws IOException { + throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScanOfResults(catalogTracker); // Map of offline servers and their regions to be returned Map>> offlineServers = new TreeMap>>(); + // store all the disabling state table names + Set disablingTables = new HashSet(1); + // store all the enabling state tablenames. + Set enablingTables = new HashSet(1); // Iterate regions in META for (Result result : results) { Pair region = @@ -1531,10 +1576,17 @@ if (region == null) continue; HServerInfo regionLocation = region.getSecond(); HRegionInfo regionInfo = region.getFirst(); + String tableName = regionInfo.getTableDesc().getNameAsString(); if (regionLocation == null) { // Region not being served, add to region map with no assignment // If this needs to be assigned out, it will also be in ZK as RIT - this.regions.put(regionInfo, null); + // add if the table is not in disabled and enabling state + if (false == checkIfRegionBelongsToDisabled(regionInfo) + && false == checkIfRegionsBelongsToEnabling(regionInfo)) { + regions.put(regionInfo, regionLocation); + } + addTheTablesInPartialState(disablingTables, enablingTables, + regionInfo, tableName); } else if (!serverManager.isServerOnline(regionLocation.getServerName())) { // Region is located on a server that isn't online List> offlineRegions = @@ -1546,14 +1598,105 @@ offlineRegions.add(new Pair(regionInfo, result)); } else { // Region is being served and on an active server - regions.put(regionInfo, regionLocation); - addToServers(regionLocation, regionInfo); + // add only if region not in disabled and enabling table + if (false == checkIfRegionBelongsToDisabled(regionInfo) + && false == checkIfRegionsBelongsToEnabling(regionInfo)) { + regions.put(regionInfo, regionLocation); + addToServers(regionLocation, regionInfo); + } + addTheTablesInPartialState(disablingTables, enablingTables, + regionInfo, tableName); } } + boolean isWatcherCreated = recoverTableInDisablingState(disablingTables); + recoverTableInEnablingState(enablingTables, isWatcherCreated); return offlineServers; } + private void addTheTablesInPartialState(Set disablingTables, + Set enablingTables, HRegionInfo regionInfo, + String disablingTableName) { + if (checkIfRegionBelongsToDisabling(regionInfo)) { + disablingTables.add(disablingTableName); + } else if (checkIfRegionsBelongsToEnabling(regionInfo)) { + enablingTables.add(disablingTableName); + } + } /** + * Recover the tables that were not fully moved to DISABLED state. These + * tables are in DISABLING state when the master restarted/switched. + * + * @param disablingTables + * @return + * @throws KeeperException + * @throws TableNotFoundException + * @throws IOException + */ + private boolean recoverTableInDisablingState(Set disablingTables) + throws KeeperException, TableNotFoundException, IOException { + boolean isWatcherCreated = false; + if (disablingTables.size() != 0) { + // Create a watcher on the zookeeper node + ZKUtil.listChildrenAndWatchForNewChildren(watcher, + watcher.assignmentZNode); + isWatcherCreated = true; + for (String tableName : disablingTables) { + // Recover by calling DisableTableHandler + LOG.info("The table " + tableName + + " is in DISABLING state. Hence recovering by moving the table" + + " to DISABLED state."); + new DisableTableHandler(this.master, tableName.getBytes(), + catalogTracker, this).process(); + } + } + return isWatcherCreated; + } + + /** + * Recover the tables that are not fully moved to ENABLED state. These tables + * are in ENABLING state when the master restarted/switched + * + * @param enablingTables + * @param isWatcherCreated + * @throws KeeperException + * @throws TableNotFoundException + * @throws IOException + */ + private void recoverTableInEnablingState(Set enablingTables, + boolean isWatcherCreated) throws KeeperException, TableNotFoundException, + IOException { + if (enablingTables.size() != 0) { + if (false == isWatcherCreated) { + ZKUtil.listChildrenAndWatchForNewChildren(watcher, + watcher.assignmentZNode); + } + for (String tableName : enablingTables) { + // Recover by calling DisableTableHandler + LOG.info("The table " + tableName + + " is in ENABLING state. Hence recovering by moving the table" + + " to ENABLED state."); + new EnableTableHandler(this.master, tableName.getBytes(), + catalogTracker, this).process(); + } + } + } + + private boolean checkIfRegionsBelongsToEnabling(HRegionInfo regionInfo) { + String tableName = regionInfo.getTableDesc().getNameAsString(); + return getZKTable().isEnablingTable(tableName); + } + + private boolean checkIfRegionBelongsToDisabled(HRegionInfo regionInfo) { + String tableName = regionInfo.getTableDesc().getNameAsString(); + return getZKTable().isDisabledTable(tableName); + } + + private boolean checkIfRegionBelongsToDisabling(HRegionInfo regionInfo) { + String tableName = regionInfo.getTableDesc().getNameAsString(); + return getZKTable().isDisablingTable(tableName); + } + + /** * Processes list of dead servers from result of META scan. *

* This is used as part of failover to handle RegionServers which failed Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1145463) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -550,15 +550,16 @@ * @param server server to open a region * @param region region to open */ - public void sendRegionOpen(HServerInfo server, HRegionInfo region) + public boolean sendRegionOpen(HServerInfo server, HRegionInfo region) throws IOException { HRegionInterface hri = getServerConnection(server); if (hri == null) { LOG.warn("Attempting to send OPEN RPC to server " + server.getServerName() + " failed because no RPC connection found to this server"); - return; + return false; } - hri.openRegion(region); + boolean openRegion = hri.openRegion(region); + return openRegion; } /** Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (revision 1145463) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (working copy) @@ -78,13 +78,7 @@ } final String encodedName = regionInfo.getEncodedName(); - // Check that this region is not already online HRegion region = this.rsServices.getFromOnlineRegions(encodedName); - if (region != null) { - LOG.warn("Attempted open of " + name + - " but already online on this server"); - return; - } // If fails, just return. Someone stole the region from under us. // Calling transitionZookeeperOfflineToOpening initalizes this.version. Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1145463) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -2102,8 +2102,15 @@ @Override @QosPriority(priority=HIGH_QOS) - public void openRegion(HRegionInfo region) + public boolean openRegion(HRegionInfo region) throws IOException { + // Check that this region is not already online + HRegion onlineRegion = this.getFromOnlineRegions(region.getEncodedName()); + if (null != onlineRegion) { + LOG.warn("Attempted open of " + region.getEncodedName() + + " but already online on this server"); + return false; + } if (this.regionsInTransitionInRS.contains(region.getEncodedNameAsBytes())) { throw new RegionAlreadyInTransitionException("open", region.getEncodedName()); } @@ -2117,6 +2124,7 @@ } else { this.service.submit(new OpenRegionHandler(this, this, region)); } + return true; } @Override