From 0ab91edc7e0f18ce71d6337db36b3b16d6f879be Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Wed, 5 Sep 2018 14:13:45 -0700 Subject: [PATCH] HBASE-21023 Added bypassProcedure() API to HbckService --- .../org/apache/hadoop/hbase/client/HBaseHbck.java | 31 +++++++++++-- .../java/org/apache/hadoop/hbase/client/Hbck.java | 29 +++++++----- .../hadoop/hbase/procedure2/ProcedureExecutor.java | 18 ++++++-- .../src/main/protobuf/Master.proto | 14 ++++++ .../hadoop/hbase/master/MasterRpcServices.java | 26 +++++++++++ .../org/apache/hadoop/hbase/client/TestHbck.java | 52 ++++++++++++++++++++++ 6 files changed, 152 insertions(+), 18 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index 03a6f69c22..b5e90bca72 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -18,12 +18,18 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface; @@ -75,10 +81,6 @@ public class HBaseHbck implements Hbck { return this.aborted; } - /** - * NOTE: This is a dangerous action, as existing running procedures for the table or regions - * which belong to the table may get confused. - */ @Override public TableState setTableStateInMeta(TableState state) throws IOException { try { @@ -92,4 +94,25 @@ public class HBaseHbck implements Hbck { throw new IOException(se); } } + + @Override + public List bypassProcedure(List pids, long waitTime, boolean force) + throws IOException { + MasterProtos.BypassProcedureResponse response = ProtobufUtil.call( + new Callable() { + @Override + public MasterProtos.BypassProcedureResponse call() throws Exception { + try { + return hbck.bypassProcedure(rpcControllerFactory.newController(), + MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids). + setWaitTime(waitTime).setForce(force).build()); + } catch (Throwable t) { + LOG.error(pids.stream().map(i -> i.toString()). + collect(Collectors.joining(", ")), t); + throw t; + } + } + }); + return response.getValueList(); + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index a216cdbc6b..aae2b8519c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -19,15 +19,16 @@ package org.apache.hadoop.hbase.client; import java.io.Closeable; import java.io.IOException; +import java.util.List; + import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.yetus.audience.InterfaceAudience; /** - * Hbck APIs for HBase. Obtain an instance from {@link ClusterConnection#getHbck()} and call + * Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call * {@link #close()} when done. - *

Hbck client APIs will be mostly used by hbck tool which in turn can be used by operators to - * fix HBase and bringging it to consistent state.

+ *

WARNING: the below methods can damage the cluster. For experienced users only. * * @see ConnectionFactory * @see ClusterConnection @@ -36,15 +37,23 @@ import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.HBCK) public interface Hbck extends Abortable, Closeable { /** - * Update table state in Meta only. No procedures are submitted to open/ assign or close/ - * unassign regions of the table. This is useful only when some procedures/ actions are stuck - * beause of inconsistency between region and table states. - * - * NOTE: This is a dangerous action, as existing running procedures for the table or regions - * which belong to the table may get confused. - * + * Update table state in Meta only. No procedures are submitted to open/assign or + * close/unassign regions of the table. * @param state table state * @return previous state of the table in Meta */ TableState setTableStateInMeta(TableState state) throws IOException; + + /** + * Bypass specified procedure and move it to completion. Procedure is marked completed but + * no actual work is done from the current state/step onwards. Parents of the procedure are + * also marked for bypass. + * + * @param pids of procedures to complete. + * @param waitTime wait time in ms for acquiring lock for a procedure + * @param force if force set to true, we will bypass the procedure even if it is executing. + * This is for procedures which can't break out during execution (bugs?). + * @return true if procedure is marked for bypass successfully, false otherwise + */ + List bypassProcedure(List pids, long waitTime, boolean force) throws IOException; } diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index 2a2f1123db..8cdeda36d0 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -37,6 +37,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; + +import com.sun.org.apache.xpath.internal.operations.Bool; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException; @@ -997,15 +999,23 @@ public class ProcedureExecutor { * @return true if bypass success * @throws IOException IOException */ - public boolean bypassProcedure(long id, long lockWait, boolean force) throws IOException { - Procedure procedure = getProcedure(id); + public List bypassProcedure(List pids, long lockWait, boolean force) + throws IOException { + List result = new ArrayList(pids.size()); + for (int i = 0; i < pids.size(); i++) { + result.set(i, bypassProcedure(pids.get(i), lockWait, force)); + } + return result; + } + + boolean bypassProcedure(long pid, long lockWait, boolean force) throws IOException { + Procedure procedure = getProcedure(pid); if (procedure == null) { - LOG.debug("Procedure with id={} does not exist, skipping bypass", id); + LOG.debug("Procedure with id={} does not exist, skipping bypass", pid); return false; } LOG.debug("Begin bypass {} with lockWait={}, force={}", procedure, lockWait, force); - IdLock.Entry lockEntry = procExecutionLock.tryLockEntry(procedure.getProcId(), lockWait); if (lockEntry == null && !force) { LOG.debug("Waited {} ms, but {} is still running, skipping bypass with force={}", diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index ca8d915705..cab7a85437 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -490,6 +490,16 @@ message SetTableStateInMetaRequest { required TableState table_state = 2; } +message BypassProcedureRequest { + repeated uint64 proc_id = 1; + optional uint64 waitTime = 2; // wait time in ms to acquire lock on a procedure + optional bool force = 3; // if true, procedure is marked for bypass even if its executing +} + +message BypassProcedureResponse { + repeated bool value = 1; +} + message GetClusterStatusRequest { repeated Option options = 1; } @@ -997,4 +1007,8 @@ service HbckService { /** Update state of the table in meta only*/ rpc SetTableStateInMeta(SetTableStateInMetaRequest) returns(GetTableStateResponse); + + /** Bypass a procedure to completion, procedure is completed but no actual work is done*/ + rpc BypassProcedure(BypassProcedureRequest) + returns(BypassProcedureResponse); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index a315027038..8b4a81935e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -30,6 +30,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.stream.Collectors; + +import com.sun.org.apache.xpath.internal.operations.Bool; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetricsBuilder; import org.apache.hadoop.hbase.DoNotRetryIOException; @@ -2306,4 +2308,28 @@ public class MasterRpcServices extends RSRpcServices throw new ServiceException(e); } } + + /** + * Bypass specified procedure to completion. Procedure is marked completed but no actual work + * is done from the current state/ step onwards. Parents of the procedure are also marked for + * bypass. + * + * NOTE: this is a dangerous operation and may be used to unstuck buggy procedures. This may + * leave system in inconherent state. This may need to be followed by some cleanup steps/ + * actions by operator. + * + * @return BypassProcedureToCompletionResponse indicating success or failure + */ + @Override + public MasterProtos.BypassProcedureResponse bypassProcedure(RpcController controller, + MasterProtos.BypassProcedureRequest request) throws ServiceException { + try { + List ret = + master.getMasterProcedureExecutor().bypassProcedure(request.getProcIdList(), + request.getWaitTime(), request.getForce()); + return MasterProtos.BypassProcedureResponse.newBuilder().build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index 86652d84e4..e41030f3fd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -21,10 +21,19 @@ package org.apache.hadoop.hbase.client; import static junit.framework.TestCase.assertTrue; import java.io.IOException; +import java.util.Arrays; +import java.util.List; + import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.junit.After; @@ -55,6 +64,8 @@ public class TestHbck { private Admin admin; private Hbck hbck; + private static ProcedureExecutor procExec; + @Rule public TestName name = new TestName(); @@ -69,6 +80,8 @@ public class TestHbck { TEST_UTIL.startMiniCluster(3); TEST_UTIL.createTable(tableName, "family1"); + + procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); } @AfterClass @@ -101,4 +114,43 @@ public class TestHbck { assertTrue("Incorrect previous state! expeced=DISABLED, found=" + prevState.getState(), prevState.isDisabled()); } + + public static class SuspendProcedure extends + ProcedureTestingUtility.NoopProcedure implements TableProcedureInterface { + public SuspendProcedure() { + super(); + } + + @Override + protected Procedure[] execute(final MasterProcedureEnv env) + throws ProcedureSuspendedException { + // Always suspend the procedure + throw new ProcedureSuspendedException(); + } + + @Override + public TableName getTableName() { + return tableName; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.READ; + } + } + + @Test + public void testBypassProcedureToCompletion() throws Exception { + // SuspendProcedure + final SuspendProcedure proc = new SuspendProcedure(); + long procId = procExec.submitProcedure(proc); + Thread.sleep(500); + + //bypass the procedure + List pids = Arrays.asList(procId); + List results = this.hbck.bypassProcedure(pids, 30000, false); + assertTrue("Failed to by pass procedure!", results.get(0)); + TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); + LOG.info("{} finished", proc); + } } -- 2.16.3