From 16661dc0ee5a40c57e84917eae1bc8752ff0d49a Mon Sep 17 00:00:00 2001 From: Guangxu Cheng Date: Thu, 30 Nov 2017 23:44:01 +0800 Subject: [PATCH] HBASE-19326 Remove dead servers from rsgroup --- .../apache/hadoop/hbase/rsgroup/RSGroupAdmin.java | 9 ++ .../hadoop/hbase/rsgroup/RSGroupAdminClient.java | 21 ++++ .../hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java | 36 +++++++ .../hadoop/hbase/rsgroup/RSGroupAdminServer.java | 52 ++++++++++ .../hadoop/hbase/rsgroup/RSGroupInfoManager.java | 6 ++ .../hbase/rsgroup/RSGroupInfoManagerImpl.java | 26 +++++ hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto | 10 ++ .../apache/hadoop/hbase/rsgroup/TestRSGroups.java | 6 +- .../hadoop/hbase/rsgroup/TestRSGroupsBase.java | 112 ++++++++++++++++++++- .../hbase/rsgroup/VerifyingRSGroupAdminClient.java | 6 ++ .../hadoop/hbase/coprocessor/MasterObserver.java | 18 ++++ .../hadoop/hbase/master/MasterCoprocessorHost.java | 24 +++++ .../hbase/security/access/AccessController.java | 6 ++ hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb | 12 +++ hbase-shell/src/main/ruby/shell.rb | 1 + .../remove_decommissioned_servers_rsgroup.rb | 34 +++++++ 16 files changed, 375 insertions(+), 4 deletions(-) create mode 100644 hbase-shell/src/main/ruby/shell/commands/remove_decommissioned_servers_rsgroup.rb diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java index 5f38d398be..bb84feead1 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java @@ -88,4 +88,13 @@ public interface RSGroupAdmin { */ void moveServersAndTables(Set
servers, Set tables, String targetGroup) throws IOException; + + /** + * Remove decommissioned servers from rsgroup. + * Sometimes we may find the server aborted due to some hardware failure and we must offline + * the server for repairing. Or we need to move some servers to join other clusters. + * So we need to remove these servers from the rsgroup. + * @param servers set of servers to remove + */ + void removeDecommissionedServers(Set
servers) throws IOException; } diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java index 9949704555..d9f3ba4597 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveServers import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveServersRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveTablesRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RSGroupAdminService; +import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveDecommissionedServersRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupProtos; @@ -205,4 +206,24 @@ class RSGroupAdminClient implements RSGroupAdmin { throw ProtobufUtil.handleRemoteException(e); } } + + @Override + public void removeDecommissionedServers(Set
servers) throws IOException { + Set hostPorts = Sets.newHashSet(); + for(Address el: servers) { + hostPorts.add(HBaseProtos.ServerName.newBuilder() + .setHostName(el.getHostname()) + .setPort(el.getPort()) + .build()); + } + RemoveDecommissionedServersRequest request = + RemoveDecommissionedServersRequest.newBuilder() + .addAllServers(hostPorts) + .build(); + try { + stub.removeDecommissionedServers(null, request); + } catch (ServiceException e) { + throw ProtobufUtil.handleRemoteException(e); + } + } } diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java index 6a3c1e5dae..1b6186cdab 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java @@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.rsgroup; import java.io.IOException; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import com.google.protobuf.RpcCallback; import com.google.protobuf.RpcController; @@ -32,6 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.CoprocessorEnvironment; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.SnapshotDescription; @@ -68,6 +71,8 @@ import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveServers import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveTablesRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveTablesResponse; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RSGroupAdminService; +import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveDecommissionedServersRequest; +import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveDecommissionedServersResponse; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupRequest; import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupResponse; import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets; @@ -289,6 +294,26 @@ public class RSGroupAdminEndpoint implements MasterCoprocessor, MasterObserver { } done.run(builder.build()); } + + @Override + public void removeDecommissionedServers(RpcController controller, + RemoveDecommissionedServersRequest request, + RpcCallback done) { + RemoveDecommissionedServersResponse.Builder builder = + RemoveDecommissionedServersResponse.newBuilder(); + try { + Set
servers = Sets.newHashSet(); + for (HBaseProtos.ServerName el : request.getServersList()) { + servers.add(Address.fromParts(el.getHostName(), el.getPort())); + } + LOG.info(master.getClientIdAuditPrefix() + + " remove decommissioned servers from rsgroup: " + servers); + groupAdminServer.removeDecommissionedServers(servers); + } catch (IOException e) { + CoprocessorRpcUtils.setControllerException(controller, e); + } + done.run(builder.build()); + } } void assignTableToGroup(TableDescriptor desc) throws IOException { @@ -357,5 +382,16 @@ public class RSGroupAdminEndpoint implements MasterCoprocessor, MasterObserver { assignTableToGroup(desc); } + @Override + public void postClearDeadServers(ObserverContext ctx, + List servers, List notClearedServers) + throws IOException { + Set
clearedServer = servers.stream(). + filter(server -> !notClearedServers.contains(server)). + map(ServerName::getAddress). + collect(Collectors.toSet()); + groupAdminServer.removeDecommissionedServers(clearedServer); + } + ///////////////////////////////////////////////////////////////////////////// } diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java index 4a9a885897..6097b6e61d 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java @@ -583,6 +583,29 @@ public class RSGroupAdminServer implements RSGroupAdmin { + servers + " , Tables : " + tables + " => " + targetGroup); } + @Override + public void removeDecommissionedServers(Set
servers) throws IOException { + { + if (servers == null || servers.isEmpty()) { + throw new ConstraintException("The set of servers to remove cannot be null or empty."); + } + // Hold a lock on the manager instance while moving servers to prevent + // another writer changing our state while we are working. + synchronized (rsGroupInfoManager) { + if (master.getMasterCoprocessorHost() != null) { + master.getMasterCoprocessorHost().preRemoveDecommissionedServers(servers); + } + //check the set of servers + checkForDeadOrOnlineServers(servers); + rsGroupInfoManager.removeDecommissionedServers(servers); + if (master.getMasterCoprocessorHost() != null) { + master.getMasterCoprocessorHost().postRemoveDecommissionedServers(servers); + } + LOG.info("Remove decommissioned servers " + servers + " from rsgroup done."); + } + } + } + private Map rsGroupGetRegionsInTransition(String groupName) throws IOException { Map rit = Maps.newTreeMap(); @@ -634,4 +657,33 @@ public class RSGroupAdminServer implements RSGroupAdmin { return result; } + + /** + * Check if the set of servers are belong to dead servers list or online servers list. + * @param servers servers to remove + */ + private void checkForDeadOrOnlineServers(Set
servers) throws ConstraintException { + // This uglyness is because we only have Address, not ServerName. + Set
onlineServers = new HashSet<>(); + for(ServerName server: master.getServerManager().getOnlineServers().keySet()) { + onlineServers.add(server.getAddress()); + } + + Set
deadServers = new HashSet<>(); + for(ServerName server: master.getServerManager().getDeadServers().copyServerNames()) { + deadServers.add(server.getAddress()); + } + + for (Address address: servers) { + if (onlineServers.contains(address)) { + throw new ConstraintException( + "Server " + address + " is an online server, not allowed to remove."); + } + if (deadServers.contains(address)) { + throw new ConstraintException( + "Server " + address + " is on the dead servers list," + + " Maybe it will come back again, not allowed to remove."); + } + } + } } diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java index 3fb40da48c..874c7a7d62 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java @@ -117,4 +117,10 @@ public interface RSGroupInfoManager { */ void moveServersAndTables(Set
servers, Set tables, String srcGroup, String dstGroup) throws IOException; + + /** + * Remove decommissioned servers from rsgroup + * @param servers set of servers to remove + */ + void removeDecommissionedServers(Set
servers) throws IOException; } diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 5fca6595ea..c6aa2c83cd 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -309,6 +309,32 @@ class RSGroupInfoManagerImpl implements RSGroupInfoManager { flushConfig(newGroupMap); } + @Override + public synchronized void removeDecommissionedServers(Set
servers) throws IOException { + Map rsGroupInfos = new HashMap(); + for (Address el: servers) { + RSGroupInfo rsGroupInfo = getRSGroupOfServer(el); + if (rsGroupInfo != null) { + RSGroupInfo newRsGroupInfo = rsGroupInfos.get(rsGroupInfo.getName()); + if (newRsGroupInfo == null) { + rsGroupInfo.removeServer(el); + rsGroupInfos.put(rsGroupInfo.getName(), rsGroupInfo); + } else { + newRsGroupInfo.removeServer(el); + rsGroupInfos.put(newRsGroupInfo.getName(), newRsGroupInfo); + } + }else { + LOG.warn("Server " + el + " does not belong to any rsgroup."); + } + } + + if (rsGroupInfos.size() > 0) { + Map newGroupMap = Maps.newHashMap(rsGroupMap); + newGroupMap.putAll(rsGroupInfos); + flushConfig(newGroupMap); + } + } + List retrieveGroupListFromGroupTable() throws IOException { List rsGroupInfoList = Lists.newArrayList(); for (Result result : rsGroupTable.getScanner(new Scan())) { diff --git a/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto b/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto index 0213402826..2a51d6db1b 100644 --- a/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto +++ b/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto @@ -115,6 +115,13 @@ message MoveServersAndTablesRequest { message MoveServersAndTablesResponse { } +message RemoveDecommissionedServersRequest { + repeated ServerName servers = 1; +} + +message RemoveDecommissionedServersResponse { +} + service RSGroupAdminService { rpc GetRSGroupInfo(GetRSGroupInfoRequest) returns (GetRSGroupInfoResponse); @@ -145,4 +152,7 @@ service RSGroupAdminService { rpc MoveServersAndTables(MoveServersAndTablesRequest) returns (MoveServersAndTablesResponse); + + rpc RemoveDecommissionedServers(RemoveDecommissionedServersRequest) + returns (RemoveDecommissionedServersResponse); } diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java index fffdeb7308..9a58097ecb 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java @@ -74,10 +74,10 @@ public class TestRSGroups extends TestRSGroupsBase { RSGroupBasedLoadBalancer.class.getName()); TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, RSGroupAdminEndpoint.class.getName()); - TEST_UTIL.startMiniCluster(NUM_SLAVES_BASE); - TEST_UTIL.getConfiguration().set( + TEST_UTIL.startMiniCluster(NUM_SLAVES_BASE - 1); + TEST_UTIL.getConfiguration().setInt( ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, - ""+NUM_SLAVES_BASE); + NUM_SLAVES_BASE - 1); TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); admin = TEST_UTIL.getAdmin(); diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java index 6aa3acd1ee..5e7beae692 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import com.google.common.collect.Lists; import java.io.IOException; import java.security.SecureRandom; import java.util.EnumSet; @@ -50,6 +51,7 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.constraint.ConstraintException; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Assert; @@ -62,6 +64,7 @@ import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps; import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoRequest; public abstract class TestRSGroupsBase { protected static final Log LOG = LogFactory.getLog(TestRSGroupsBase.class); @@ -863,4 +866,111 @@ public abstract class TestRSGroupsBase { //verify that all region still assgin on targetServer Assert.assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size()); } -} + + @Test + public void testClearDeadServers() throws Exception { + LOG.info("testClearDeadServers"); + final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3); + + ServerName targetServer = ServerName.parseServerName( + newGroup.getServers().iterator().next().toString()); + AdminProtos.AdminService.BlockingInterface targetRS = + ((ClusterConnection) admin.getConnection()).getAdmin(targetServer); + try { + targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null, + GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName()); + //stopping may cause an exception + //due to the connection loss + targetRS.stopServer(null, + AdminProtos.StopServerRequest.newBuilder().setReason("Die").build()); + } catch(Exception e) { + } + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + //wait for stopped regionserver to dead server list + TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return !master.getServerManager().areDeadServersInProgress() + && cluster.getClusterStatus().getDeadServerNames().size() > 0; + } + }); + assertFalse(cluster.getClusterStatus().getServers().contains(targetServer)); + assertTrue(cluster.getClusterStatus().getDeadServerNames().contains(targetServer)); + assertTrue(newGroup.getServers().contains(targetServer.getAddress())); + + //clear dead servers list + List notClearedServers = admin.clearDeadServers(Lists.newArrayList(targetServer)); + assertEquals(0, notClearedServers.size()); + + Set
newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers(); + assertFalse(newGroupServers.contains(targetServer.getAddress())); + assertEquals(2, newGroupServers.size()); + } + + @Test + public void testRemoveServers() throws Exception { + LOG.info("testRemoveServers"); + final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3); + ServerName targetServer = ServerName.parseServerName( + newGroup.getServers().iterator().next().toString()); + try { + rsGroupAdmin.removeDecommissionedServers(Sets.newHashSet(targetServer.getAddress())); + fail("Online servers shouldn't have been successfully removed."); + } catch(IOException ex) { + String exp = "Server " + targetServer.getAddress() + + " is an online server, not allowed to remove."; + String msg = "Expected '" + exp + "' in exception message: "; + assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp)); + } + assertTrue(newGroup.getServers().contains(targetServer.getAddress())); + + AdminProtos.AdminService.BlockingInterface targetRS = + ((ClusterConnection) admin.getConnection()).getAdmin(targetServer); + try { + targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null, + GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName()); + //stopping may cause an exception + //due to the connection loss + targetRS.stopServer(null, + AdminProtos.StopServerRequest.newBuilder().setReason("Die").build()); + } catch(Exception e) { + } + + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + //wait for stopped regionserver to dead server list + TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return !master.getServerManager().areDeadServersInProgress() + && cluster.getClusterStatus().getDeadServerNames().size() > 0; + } + }); + + try { + rsGroupAdmin.removeDecommissionedServers(Sets.newHashSet(targetServer.getAddress())); + fail("Dead servers shouldn't have been successfully removed."); + } catch(IOException ex) { + String exp = "Server " + targetServer.getAddress() + " is on the dead servers list," + + " Maybe it will come back again, not allowed to remove."; + String msg = "Expected '" + exp + "' in exception message: "; + assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp)); + } + assertTrue(newGroup.getServers().contains(targetServer.getAddress())); + + ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); + TEST_UTIL.getHBaseClusterInterface().stopMaster(sn); + TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000); + TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0); + TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(60000); + + assertEquals(3, cluster.getClusterStatus().getServersSize()); + assertFalse(cluster.getClusterStatus().getServers().contains(targetServer)); + assertFalse(cluster.getClusterStatus().getDeadServerNames().contains(targetServer)); + assertTrue(newGroup.getServers().contains(targetServer.getAddress())); + + rsGroupAdmin.removeDecommissionedServers(Sets.newHashSet(targetServer.getAddress())); + Set
newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers(); + assertFalse(newGroupServers.contains(targetServer.getAddress())); + assertEquals(2, newGroupServers.size()); + } +} \ No newline at end of file diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java index ba3534de91..dcfcaa6295 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java @@ -110,6 +110,12 @@ public class VerifyingRSGroupAdminClient implements RSGroupAdmin { verify(); } + @Override + public void removeDecommissionedServers(Set
servers) throws IOException { + wrapped.removeDecommissionedServers(servers); + verify(); + } + public void verify() throws IOException { Map groupMap = Maps.newHashMap(); Set zList = Sets.newHashSet(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java index 398e56bafe..40265febc0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java @@ -1099,6 +1099,24 @@ public interface MasterObserver { String groupName, boolean balancerRan) throws IOException {} /** + * Called before servers are removed from rsgroup + * @param ctx the environment to interact with the framework and master + * @param servers set of decommissioned servers to remove + */ + default void preRemoveDecommissionedServers( + final ObserverContext ctx, + Set
servers) throws IOException {} + + /** + * Called after servers are removed from rsgroup + * @param ctx the environment to interact with the framework and master + * @param servers set of servers to remove + */ + default void postRemoveDecommissionedServers( + final ObserverContext ctx, + Set
servers) throws IOException {} + + /** * Called before add a replication peer * @param ctx the environment to interact with the framework and master * @param peerId a short name that identifies the peer diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java index 43373474c0..64b0db15c8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java @@ -1401,6 +1401,30 @@ public class MasterCoprocessorHost }); } + public void preRemoveDecommissionedServers(final Set
deadServers) + throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + if(((MasterEnvironment)getEnvironment()).supportGroupCPs) { + observer.preRemoveDecommissionedServers(this, deadServers); + } + } + }); + } + + public void postRemoveDecommissionedServers(final Set
deadServers) + throws IOException { + execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { + @Override + public void call(MasterObserver observer) throws IOException { + if(((MasterEnvironment)getEnvironment()).supportGroupCPs) { + observer.postRemoveDecommissionedServers(this, deadServers); + } + } + }); + } + public void preAddReplicationPeer(final String peerId, final ReplicationPeerConfig peerConfig) throws IOException { execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java index f2b75417cd..ed2cc0b82c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java @@ -2690,6 +2690,12 @@ public class AccessController implements MasterCoprocessor, RegionCoprocessor, } @Override + public void preRemoveDecommissionedServers(ObserverContext ctx, + Set
servers) throws IOException { + requirePermission(getActiveUser(ctx), "removeDecommissionedServers", Action.ADMIN); + } + + @Override public void preAddReplicationPeer(final ObserverContext ctx, String peerId, ReplicationPeerConfig peerConfig) throws IOException { requirePermission(getActiveUser(ctx), "addReplicationPeer", Action.ADMIN); diff --git a/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb b/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb index befed010ed..eadcd85964 100644 --- a/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb +++ b/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb @@ -118,5 +118,17 @@ module Hbase end @admin.moveServersAndTables(servers, tables, dest) end + + #-------------------------------------------------------------------------- + # remove decommissioned server from rsgroup + def remove_decommissioned_servers(*args) + # Flatten params array + args = args.flatten.compact + servers = java.util.HashSet.new + args.each do |s| + servers.add(org.apache.hadoop.hbase.net.Address.fromString(s)) + end + @admin.removeDecommissionedServers(servers) + end end end diff --git a/hbase-shell/src/main/ruby/shell.rb b/hbase-shell/src/main/ruby/shell.rb index 60ca229b93..017290bd7e 100644 --- a/hbase-shell/src/main/ruby/shell.rb +++ b/hbase-shell/src/main/ruby/shell.rb @@ -483,5 +483,6 @@ Shell.load_command_group( move_servers_tables_rsgroup get_server_rsgroup get_table_rsgroup + remove_decommissioned_servers_rsgroup ] ) diff --git a/hbase-shell/src/main/ruby/shell/commands/remove_decommissioned_servers_rsgroup.rb b/hbase-shell/src/main/ruby/shell/commands/remove_decommissioned_servers_rsgroup.rb new file mode 100644 index 0000000000..a6db29068e --- /dev/null +++ b/hbase-shell/src/main/ruby/shell/commands/remove_decommissioned_servers_rsgroup.rb @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class RemoveDecommissionedServersRsgroup < Command + def help + <<-EOF +Remove decommissioned regionServers from rsgroup. +Example: + hbase> remove_decommissioned_servers_rsgroup ['server1:port','server2:port'] +EOF + end + + def command(servers) + rsgroup_admin.remove_decommissioned_servers(servers) + end + end + end +end -- 2.13.0.windows.1