diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 809b311..e488186 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -109,6 +109,7 @@ import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost; import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.quotas.MasterQuotaManager; @@ -124,6 +125,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CompressionTest; import org.apache.hadoop.hbase.util.EncryptionTest; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HBaseFsckRepair; import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.Pair; @@ -799,7 +801,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode); if (replicaId >= numMetaReplicasConfigured) { RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId); - serverManager.sendRegionClose(r.getServerName(), r.getRegion()); + LOG.info("Closing excess replica of meta region " + r.getRegion()); + // send a close and wait for a max of 30 seconds + ServerManager.closeRegionSilentlyAndWait(getConnection(), r.getServerName(), + r.getRegion(), 30000); ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId)); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index c42f314..69c29fd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -39,6 +39,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerLoad; @@ -768,6 +769,35 @@ public class ServerManager { } /** + * Contacts a region server and waits up to timeout ms + * to close the region. This bypasses the active hmaster. + */ + public static void closeRegionSilentlyAndWait(ClusterConnection connection, + ServerName server, HRegionInfo region, long timeout) throws IOException, InterruptedException { + AdminService.BlockingInterface rs = connection.getAdmin(server); + try { + ProtobufUtil.closeRegion(rs, server, region.getRegionName()); + } catch (IOException e) { + LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e); + } + long expiration = timeout + System.currentTimeMillis(); + while (System.currentTimeMillis() < expiration) { + try { + HRegionInfo rsRegion = + ProtobufUtil.getRegionInfo(rs, region.getRegionName()); + if (rsRegion == null) return; + } catch (IOException ioe) { + if (ioe instanceof NotServingRegionException) // no need to retry again + return; + LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe); + } + Thread.sleep(1000); + } + throw new IOException("Region " + region + " failed to close within" + + " timeout " + timeout); + } + + /** * Sends an MERGE REGIONS RPC to the specified server to merge the specified * regions. *
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java index 8175454..3c63e8a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsckRepair.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.HConnection; @@ -38,8 +39,7 @@ import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.RegionState; -import org.apache.hadoop.hbase.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService; +import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.zookeeper.KeeperException; @@ -153,29 +153,10 @@ public class HBaseFsckRepair { @SuppressWarnings("deprecation") public static void closeRegionSilentlyAndWait(HConnection connection, ServerName server, HRegionInfo region) throws IOException, InterruptedException { - AdminService.BlockingInterface rs = connection.getAdmin(server); - try { - ProtobufUtil.closeRegion(rs, server, region.getRegionName()); - } catch (IOException e) { - LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e); - } long timeout = connection.getConfiguration() .getLong("hbase.hbck.close.timeout", 120000); - long expiration = timeout + System.currentTimeMillis(); - while (System.currentTimeMillis() < expiration) { - try { - HRegionInfo rsRegion = - ProtobufUtil.getRegionInfo(rs, region.getRegionName()); - if (rsRegion == null) return; - } catch (IOException ioe) { - if (ioe instanceof NotServingRegionException) // no need to retry again - return; - LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(), ioe); - } - Thread.sleep(1000); - } - throw new IOException("Region " + region + " failed to close within" - + " timeout " + timeout); + ServerManager.closeRegionSilentlyAndWait((ClusterConnection)connection, server, + region, timeout); } /**