From 178b214234d34835a308d730a92cca4f0e60393d Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Wed, 5 Sep 2018 14:13:45 -0700 Subject: [PATCH] HBASE-21023 Added bypassProcedureToCompletion() API to HbckService --- .../org/apache/hadoop/hbase/client/HBaseHbck.java | 28 +++++++++++++ .../java/org/apache/hadoop/hbase/client/Hbck.java | 22 ++++++++++ .../hbase/shaded/protobuf/RequestConverter.java | 7 ++++ .../src/main/protobuf/Master.proto | 14 +++++++ .../hadoop/hbase/master/MasterRpcServices.java | 25 +++++++++++ .../org/apache/hadoop/hbase/client/TestHbck.java | 48 ++++++++++++++++++++++ 6 files changed, 144 insertions(+) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index 03a6f69c221819fa4dada4bb387c2b050272a406..3fc570ab1b16214af87f5dece820e52f61d11b6f 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -18,13 +18,16 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.util.concurrent.Callable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureToCompletionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface; @@ -92,4 +95,29 @@ public class HBaseHbck implements Hbck { throw new IOException(se); } } + + /** + * NOTE: this is a dangerous operation and may be used to unstuck buggy procedures. This may + * leave system in inconherent state. This may need to be followed by some cleanup steps/ + * actions by operator. + */ + @Override + public boolean bypassProcedureToCompletion(long procId, long waitTime, boolean force) + throws IOException { + BypassProcedureToCompletionResponse response = ProtobufUtil.call( + new Callable() { + @Override + public BypassProcedureToCompletionResponse call() throws Exception { + try { + return hbck.bypassProcedureToCompletion(rpcControllerFactory.newController(), + RequestConverter.buildBypassProcedureToCompletionRequest(procId, + waitTime, force)); + } catch (Throwable t) { + LOG.error("Error = {}", t); + throw t; + } + } + }); + return response.getValue(); + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index a216cdbc6b7d17b17f88ba0ff628cabd7bae6983..cb9f197ee8c6799f8db2420d4c3ec7bdfa88df91 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -47,4 +47,26 @@ public interface Hbck extends Abortable, Closeable { * @return previous state of the table in Meta */ TableState setTableStateInMeta(TableState state) throws IOException; + + /** + * Bypass specified procedure to completion. Procedure is marked completed but + * no actual work is done from the current state/ step onwards. Parents of the procedure are + * also marked for bypass. + * + * NOTE: this is a dangerous operation and may be used to unstuck buggy procedures. This may + * leave system in inconherent state. This may need to be followed by some cleanup steps/ + * actions by operator. + * + * @param procId id of the procedure to bypass to completion + * @param waitTime wait time in ms for acquirig lock for a procedure + * @param fource if force set to true, we will bypass the procedure even if it is executing. + * This is for procedures which can't break out during executing(due to bug, mostly) + * In this case, bypassing the procedure is not enough, since it is already stuck + * there. We need to restart the master after bypassing, and letting the problematic + * procedure to execute wth bypass=true, so in that condition, the procedure can be + * successfully bypassed. + * + * @return true if procedure is marked for bypass successfully, false otherwise + */ + boolean bypassProcedureToCompletion(long procId, long waitTime, boolean force) throws IOException; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 41abf97696f51c7b1611e2d76ecb700b4e240f42..3a8ea27a162f4258a51e57b54d970d1b101027e5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -99,6 +99,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AddColumnRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureToCompletionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest; @@ -1888,4 +1889,10 @@ public final class RequestConverter { return TransitReplicationPeerSyncReplicationStateRequest.newBuilder().setPeerId(peerId) .setSyncReplicationState(ReplicationPeerConfigUtil.toSyncReplicationState(state)).build(); } + + public static BypassProcedureToCompletionRequest + buildBypassProcedureToCompletionRequest(long procId, long waitTime, boolean force) { + return BypassProcedureToCompletionRequest.newBuilder().setProcId(procId) + .setWaitTime(waitTime).setForce(force).build(); + } } diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index 69e0f32d07e4223d7dc924b8b5c7b7449346a765..e15799b45ff17842b4be07c1558dccb58d89008b 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -490,6 +490,16 @@ message SetTableStateInMetaRequest { required TableState table_state = 2; } +message BypassProcedureToCompletionRequest { + required uint64 proc_id = 1; + optional uint64 waitTime = 2; // wait time in ms to acquire lock on a procedure + optional bool force = 3; // if true, procedure is marked for bypass even if its executing +} + +message BypassProcedureToCompletionResponse { + required bool value = 1; +} + message GetClusterStatusRequest { repeated Option options = 1; } @@ -1001,4 +1011,8 @@ service HbckService { /** Update state of the table in meta only*/ rpc SetTableStateInMeta(SetTableStateInMetaRequest) returns(GetTableStateResponse); + + /** Bypass a procedure to completion, procedure is completed but no actual work is done*/ + rpc BypassProcedureToCompletion(BypassProcedureToCompletionRequest) + returns(BypassProcedureToCompletionResponse); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 8aadb98ed0e561e96310d8938db52dd2cfa02135..792c02f8a64e7b304b8ada1099fd605d15e78fcf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -134,6 +134,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignRegi import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignRegionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureToCompletionRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureToCompletionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceRequest; @@ -2341,4 +2343,27 @@ public class MasterRpcServices extends RSRpcServices throw new ServiceException(e); } } + + /** + * Bypass specified procedure to completion. Procedure is marked completed but no actual work + * is done from the current state/ step onwards. Parents of the procedure are also marked for + * bypass. + * + * NOTE: this is a dangerous operation and may be used to unstuck buggy procedures. This may + * leave system in inconherent state. This may need to be followed by some cleanup steps/ + * actions by operator. + * + * @return BypassProcedureToCompletionResponse indicating success or failure + */ + @Override + public BypassProcedureToCompletionResponse bypassProcedureToCompletion(RpcController controller, + BypassProcedureToCompletionRequest request) throws ServiceException { + try { + boolean ret = master.getMasterProcedureExecutor().bypassProcedure(request.getProcId(), + request.getWaitTime(), request.getForce()); + return BypassProcedureToCompletionResponse.newBuilder().setValue(ret).build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index 86652d84e42b56837652f117c6705a34844e7fd5..5a62919fbdbfa14317ccdde43cd39ea9089a47f2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -25,6 +25,12 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.junit.After; @@ -55,6 +61,8 @@ public class TestHbck { private Admin admin; private Hbck hbck; + private static ProcedureExecutor procExec; + @Rule public TestName name = new TestName(); @@ -69,6 +77,8 @@ public class TestHbck { TEST_UTIL.startMiniCluster(3); TEST_UTIL.createTable(tableName, "family1"); + + procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); } @AfterClass @@ -101,4 +111,42 @@ public class TestHbck { assertTrue("Incorrect previous state! expeced=DISABLED, found=" + prevState.getState(), prevState.isDisabled()); } + + public static class SuspendProcedure extends + ProcedureTestingUtility.NoopProcedure implements TableProcedureInterface { + public SuspendProcedure() { + super(); + } + + @Override + protected Procedure[] execute(final MasterProcedureEnv env) + throws ProcedureSuspendedException { + // Always suspend the procedure + throw new ProcedureSuspendedException(); + } + + @Override + public TableName getTableName() { + return tableName; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.READ; + } + } + + @Test + public void testBypassProcedureToCompletion() throws Exception { + // SuspendProcedure + final SuspendProcedure proc = new SuspendProcedure(); + long procId = procExec.submitProcedure(proc); + Thread.sleep(500); + + //bypass the procedure + assertTrue("Failed to by pass procedure!", + hbck.bypassProcedureToCompletion(procId, 30000, false)); + TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); + LOG.info("{} finished", proc); + } } -- 2.16.1