From e1c67bad6ba3e4c6992ea3338d5cc7728f69e3d1 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 12 Sep 2018 21:28:52 -0700 Subject: [PATCH] HBASE-21156 [hbck2] Queue an assign of hbase:meta and bulk assign/unassign Adds 'raw' assigns and unassigns methods to Hbck Service. --- .../org/apache/hadoop/hbase/client/HBaseAdmin.java | 1 - .../org/apache/hadoop/hbase/client/HBaseHbck.java | 49 +++++++++-- .../java/org/apache/hadoop/hbase/client/Hbck.java | 38 +++++++-- .../hbase/shaded/protobuf/RequestConverter.java | 22 +++++ .../src/main/protobuf/Master.proto | 61 +++++++++++++- .../hadoop/hbase/master/MasterRpcServices.java | 97 ++++++++++++++++++++++ .../hbase/master/assignment/AssignmentManager.java | 5 ++ .../org/apache/hadoop/hbase/client/TestHbck.java | 86 +++++++++++++------ 8 files changed, 315 insertions(+), 44 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 7f3abea9a3..a160204f18 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -226,7 +226,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; * @see Admin */ @InterfaceAudience.Private -@InterfaceStability.Evolving public class HBaseAdmin implements Admin { private static final Logger LOG = LoggerFactory.getLogger(HBaseAdmin.class); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index 03a6f69c22..392708db01 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -18,8 +18,12 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,12 +35,16 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckServic /** * Use {@link ClusterConnection#getHbck()} to obtain an instance of {@link Hbck} instead of - * constructing - * an HBaseHbck directly. This will be mostly used by hbck tool. + * constructing an HBaseHbck directly. * *

Connection should be an unmanaged connection obtained via * {@link ConnectionFactory#createConnection(Configuration)}.

* + *

NOTE: The methods in here can do damage to a cluster if applied in the wrong sequence or at + * the wrong time. Use with caution. For experts only. These methods are only for the + * extreme case where the cluster has been damaged or has achieved an inconsistent state because + * of some unforeseen circumstance or bug and requires manual intervention. + * *

An instance of this class is lightweight and not-thread safe. A new instance should be created * by each thread. Pooling or caching of the instance is not recommended.

* @@ -75,10 +83,6 @@ public class HBaseHbck implements Hbck { return this.aborted; } - /** - * NOTE: This is a dangerous action, as existing running procedures for the table or regions - * which belong to the table may get confused. - */ @Override public TableState setTableStateInMeta(TableState state) throws IOException { try { @@ -87,9 +91,38 @@ public class HBaseHbck implements Hbck { RequestConverter.buildSetTableStateInMetaRequest(state)); return TableState.convert(state.getTableName(), response.getTableState()); } catch (ServiceException se) { - LOG.debug("ServiceException while updating table state in meta. table={}, state={}", - state.getTableName(), state.getState()); + LOG.debug("table={}, state={}", state.getTableName(), state.getState(), se); + throw new IOException(se); + } + } + + @Override + public List assigns(List encodedRegionNames) throws IOException { + try { + MasterProtos.AssignsResponse response = + this.hbck.assigns(rpcControllerFactory.newController(), + RequestConverter.toAssignRegionsRequest(encodedRegionNames)); + return response.getPidList(); + } catch (ServiceException se) { + LOG.debug(toCommaDelimitedString(encodedRegionNames), se); throw new IOException(se); } } + + @Override + public List unassigns(List encodedRegionNames) throws IOException { + try { + MasterProtos.UnassignsResponse response = + this.hbck.unassigns(rpcControllerFactory.newController(), + RequestConverter.toUnassignRegionsRequest(encodedRegionNames)); + return response.getPidList(); + } catch (ServiceException se) { + LOG.debug(toCommaDelimitedString(encodedRegionNames), se); + throw new IOException(se); + } + } + + private static String toCommaDelimitedString(List list) { + return list.stream().collect(Collectors.joining(", ")); + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index a216cdbc6b..c6a3caab61 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hbase.client; import java.io.Closeable; import java.io.IOException; +import java.util.List; + import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.yetus.audience.InterfaceAudience; @@ -27,7 +29,12 @@ import org.apache.yetus.audience.InterfaceAudience; * Hbck APIs for HBase. Obtain an instance from {@link ClusterConnection#getHbck()} and call * {@link #close()} when done. *

Hbck client APIs will be mostly used by hbck tool which in turn can be used by operators to - * fix HBase and bringging it to consistent state.

+ * fix HBase and bringing it to consistent state.

+ * + *

NOTE: The methods in here can do damage to a cluster if applied in the wrong sequence or at + * the wrong time. Use with caution. For experts only. These methods are only for the + * extreme case where the cluster has been damaged or has achieved an inconsistent state because + * of some unforeseen circumstance or bug and requires manual intervention. * * @see ConnectionFactory * @see ClusterConnection @@ -36,15 +43,36 @@ import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.HBCK) public interface Hbck extends Abortable, Closeable { /** - * Update table state in Meta only. No procedures are submitted to open/ assign or close/ - * unassign regions of the table. This is useful only when some procedures/ actions are stuck - * beause of inconsistency between region and table states. + * Update table state in Meta only. No procedures are submitted to open/assign or + * close/unassign regions of the table. * - * NOTE: This is a dangerous action, as existing running procedures for the table or regions + *

>NOTE: This is a dangerous action, as existing running procedures for the table or regions * which belong to the table may get confused. * * @param state table state * @return previous state of the table in Meta */ TableState setTableStateInMeta(TableState state) throws IOException; + + /** + * Like {@link Admin#assign(byte[])} but 'raw' in that it can do more than one Region at a time + * -- good if many Regions to online -- and it will schedule the assigns even in the case where + * Master is initializing (as long as the ProcedureExecutor is up). Does NOT call Coprocessor + * hooks. + * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding + * for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an + * example of what a random user-space encoded Region name looks like. + */ + List assigns(List encodedRegionNames) throws IOException; + + /** + * Like {@link Admin#unassign(byte[], boolean)} but 'raw' in that it can do more than one Region + * at a time -- good if many Regions to offline -- and it will schedule the assigns even in the + * case where Master is initializing (as long as the ProcedureExecutor is up). Does NOT call + * Coprocessor hooks. + * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding + * for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an + * example of what a random user-space encoded Region name looks like. + */ + List unassigns(List encodedRegionNames) throws IOException; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 64da3a383c..6ab81e22f2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.CellScannable; import org.apache.hadoop.hbase.ClusterMetrics.Option; @@ -1879,4 +1880,25 @@ public final class RequestConverter { } return pbServers; } + + // HBCK2 + public static MasterProtos.AssignsRequest toAssignRegionsRequest( + List encodedRegionNames) { + MasterProtos.AssignsRequest.Builder b = MasterProtos.AssignsRequest.newBuilder(); + return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build(); + } + + public static MasterProtos.UnassignsRequest toUnassignRegionsRequest( + List encodedRegionNames) { + MasterProtos.UnassignsRequest.Builder b = + MasterProtos.UnassignsRequest.newBuilder(); + return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build(); + } + + private static List toEncodedRegionNameRegionSpecifiers( + List encodedRegionNames) { + return encodedRegionNames.stream(). + map(r -> buildRegionSpecifier(RegionSpecifierType.ENCODED_REGION_NAME, Bytes.toBytes(r))). + collect(Collectors.toList()); + } } diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index ca8d915705..8cef398aff 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -485,10 +485,6 @@ message GetTableStateResponse { required TableState table_state = 1; } -message SetTableStateInMetaRequest { - required TableName table_name = 1; - required TableState table_state = 2; -} message GetClusterStatusRequest { repeated Option options = 1; @@ -993,8 +989,65 @@ service MasterService { } +// HBCK Service definitions. + +message SetTableStateInMetaRequest { + required TableName table_name = 1; + required TableState table_state = 2; +} + +/** Like Admin's AssignRegionRequest except it can + * take one or more Regions at a time. + */ +// NOTE: In hbck.proto, there is a define for +// AssignRegionRequest -- singular 'Region'. This +// is plural to convey it can carry more than one +// Region at a time. +message AssignsRequest { + repeated RegionSpecifier region = 1; +} + +/** Like Admin's AssignRegionResponse except it can + * return one or more pids as result -- one per assign. + */ +message AssignsResponse { + repeated uint64 pid = 1; +} + +/** Like Admin's UnassignRegionRequest except it can + * take one or more Regions at a time. + */ +message UnassignsRequest { + repeated RegionSpecifier region = 1; +} + +/** Like Admin's UnassignRegionResponse except it can + * return one or more pids as result -- one per unassign. + */ +message UnassignsResponse { + repeated uint64 pid = 1; +} + service HbckService { /** Update state of the table in meta only*/ rpc SetTableStateInMeta(SetTableStateInMetaRequest) returns(GetTableStateResponse); + + /** + * Assign regions. + * Like Admin's assign but works even if the + * Master is initializing. Also allows bulk'ing up + * assigns rather than one region at a time. + */ + rpc Assigns(AssignsRequest) + returns(AssignsResponse); + + /** + * Unassign regions + * Like Admin's unssign but works even if the + * Master is initializing. Also allows bulk'ing up + * assigns rather than one region at a time. + */ + rpc Unassigns(UnassignsRequest) + returns(UnassignsResponse); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index a12608ac69..0b18d112fe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -29,11 +29,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetricsBuilder; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.Server; @@ -63,7 +65,9 @@ import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface; import org.apache.hadoop.hbase.ipc.RpcServerFactory; import org.apache.hadoop.hbase.ipc.RpcServerInterface; import org.apache.hadoop.hbase.ipc.ServerRpcController; +import org.apache.hadoop.hbase.master.assignment.AssignProcedure; import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.master.assignment.UnassignProcedure; import org.apache.hadoop.hbase.master.locking.LockProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable; @@ -2307,4 +2311,97 @@ public class MasterRpcServices extends RSRpcServices throw new ServiceException(e); } } + + /** + * Get RegionInfo from Master using content of RegionSpecifier as key. + * @return RegionInfo found by decoding rs or null if none found + */ + private RegionInfo getRegionInfo(HBaseProtos.RegionSpecifier rs) throws UnknownRegionException { + RegionInfo ri = null; + switch(rs.getType()) { + case REGION_NAME: + final byte[] regionName = rs.getValue().toByteArray(); + ri = this.master.getAssignmentManager().getRegionInfo(regionName); + break; + case ENCODED_REGION_NAME: + String encodedRegionName = Bytes.toString(rs.getValue().toByteArray()); + RegionState regionState = this.master.getAssignmentManager().getRegionStates(). + getRegionState(encodedRegionName); + ri = regionState == null? null: regionState.getRegion(); + break; + default: + break; + } + return ri; + } + + /** + * Submit the Procedure that gets created by f + * @return pid of the submitted Procedure. + */ + private long submitProcedure(HBaseProtos.RegionSpecifier rs, Function f) + throws UnknownRegionException { + RegionInfo ri = getRegionInfo(rs); + long pid = Procedure.NO_PROC_ID; + if (ri == null) { + LOG.warn("No RegionInfo found to match {}", rs); + } else { + pid = this.master.getMasterProcedureExecutor().submitProcedure(f.apply(ri)); + } + return pid; + } + + /** + * A 'raw' version of assign that does bulk and skirts Master state checks (assigns can be made + * during Master startup). For use by Hbck2. + * @see #assignRegion(RpcController, AssignRegionRequest) + */ + @Override + public MasterProtos.AssignsResponse assigns(RpcController controller, + MasterProtos.AssignsRequest request) + throws ServiceException { + LOG.info(master.getClientIdAuditPrefix() + " assigns"); + if (this.master.getMasterProcedureExecutor() == null) { + throw new ServiceException("Master's ProcedureExecutor not initialized; retry later"); + } + MasterProtos.AssignsResponse.Builder responseBuilder = + MasterProtos.AssignsResponse.newBuilder(); + try { + for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { + long pid = submitProcedure(rs, + r -> this.master.getAssignmentManager().createAssignProcedure(r)); + responseBuilder.addPid(pid); + } + return responseBuilder.build(); + } catch (IOException ioe) { + throw new ServiceException(ioe); + } + } + + /** + * A 'raw' version of unassign that does bulk and skirts Master state checks (unassigns can be + * made during Master startup). For use by Hbck2. + * @see #unassignRegion(RpcController, UnassignRegionRequest) + */ + @Override + public MasterProtos.UnassignsResponse unassigns(RpcController controller, + MasterProtos.UnassignsRequest request) + throws ServiceException { + LOG.info(master.getClientIdAuditPrefix() + " unassigns"); + if (this.master.getMasterProcedureExecutor() == null) { + throw new ServiceException("Master's ProcedureExecutor not initialized; retry later"); + } + MasterProtos.UnassignsResponse.Builder responseBuilder = + MasterProtos.UnassignsResponse.newBuilder(); + try { + for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { + long pid = submitProcedure(rs, + ri -> this.master.getAssignmentManager().createUnassignProcedure(ri)); + responseBuilder.addPid(pid); + } + return responseBuilder.build(); + } catch (IOException ioe) { + throw new ServiceException(ioe); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index ce33e5204c..759fe540c6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -694,6 +694,11 @@ public class AssignmentManager implements ServerListener { return proc; } + public UnassignProcedure createUnassignProcedure(final RegionInfo regionInfo) { + return createUnassignProcedure(regionInfo, null, false); + + } + UnassignProcedure createUnassignProcedure(final RegionInfo regionInfo, final ServerName destinationServer, final boolean force) { return createUnassignProcedure(regionInfo, destinationServer, force, false); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index 86652d84e4..f290b9bab3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -19,14 +19,22 @@ package org.apache.hadoop.hbase.client; import static junit.framework.TestCase.assertTrue; +import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.procedure.Procedure; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Threads; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -52,23 +60,16 @@ public class TestHbck { private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); - private Admin admin; - private Hbck hbck; @Rule public TestName name = new TestName(); - private static final TableName tableName = TableName.valueOf(TestHbck.class.getSimpleName()); + private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); @BeforeClass public static void setUpBeforeClass() throws Exception { - TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); - TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); - TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6); - TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true); TEST_UTIL.startMiniCluster(3); - - TEST_UTIL.createTable(tableName, "family1"); + TEST_UTIL.createMultiRegionTable(TABLE_NAME, Bytes.toBytes("family1"), 5); } @AfterClass @@ -76,29 +77,62 @@ public class TestHbck { TEST_UTIL.shutdownMiniCluster(); } - @Before - public void setUp() throws Exception { - this.admin = TEST_UTIL.getAdmin(); - this.hbck = TEST_UTIL.getHbck(); - } - - @After - public void tearDown() throws Exception { - for (HTableDescriptor htd : this.admin.listTables()) { - TEST_UTIL.deleteTable(htd.getTableName()); - } - this.hbck.close(); - } - @Test public void testSetTableStateInMeta() throws IOException { + Hbck hbck = TEST_UTIL.getHbck(); // set table state to DISABLED - hbck.setTableStateInMeta(new TableState(tableName, TableState.State.DISABLED)); + hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED)); // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case // will be DISABLED TableState prevState = - hbck.setTableStateInMeta(new TableState(tableName, TableState.State.ENABLED)); + hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED)); assertTrue("Incorrect previous state! expeced=DISABLED, found=" + prevState.getState(), prevState.isDisabled()); } + + @Test + public void testAssigns() throws IOException { + Hbck hbck = TEST_UTIL.getHbck(); + try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { + List regions = admin.getRegions(TABLE_NAME); + for (RegionInfo ri: regions) { + RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(). + getRegionStates().getRegionState(ri.getEncodedName()); + LOG.info("RS: {}", rs.toString()); + } + List pids = hbck.unassigns(regions.stream().map(r -> r.getEncodedName()). + collect(Collectors.toList())); + waitOnPids(pids); + for (RegionInfo ri: regions) { + RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(). + getRegionStates().getRegionState(ri.getEncodedName()); + LOG.info("RS: {}", rs.toString()); + assertTrue(rs.toString(), rs.isClosed()); + } + pids = hbck.assigns(regions.stream().map(r -> r.getEncodedName()). + collect(Collectors.toList())); + waitOnPids(pids); + for (RegionInfo ri: regions) { + RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(). + getRegionStates().getRegionState(ri.getEncodedName()); + LOG.info("RS: {}", rs.toString()); + assertTrue(rs.toString(), rs.isOpened()); + } + // What happens if crappy region list passed? + pids = hbck.assigns(Arrays.stream(new String [] {"a", "some rubbish name"}). + collect(Collectors.toList())); + for (long pid: pids) { + assertEquals(org.apache.hadoop.hbase.procedure2.Procedure.NO_PROC_ID, pid); + } + } + } + + private void waitOnPids(List pids) { + for (Long pid: pids) { + while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(). + isFinished(pid)) { + Threads.sleep(100); + } + } + } } -- 2.16.3