From b3f88d32f07457e15b09d490e4ef55fd3a3cf87f Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Wed, 10 May 2017 11:41:59 -0700 Subject: [PATCH] HBASE-18018 Changes to support abort for all procedures by default The default behavior for abort() method of StateMachineProcedure is changed to support aborting all procedures irrespective of if rollback is supported or not. Currently its observed that sometimes procedures may fail on a step which will be considered as retryable error as abort is not supported. As a result procedure may stuck in a endless loop repeating same step again.User should have an option to abort any stuck procedure and do clean up manually. Please refer to HBASE-18016 and discussion there. --- .../hbase/procedure2/StateMachineProcedure.java | 37 ++++++++++++++-------- .../procedure2/TestStateMachineProcedure.java | 20 +++++++++++- .../master/procedure/DeleteTableProcedure.java | 9 ++++++ 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java index ea2a41fe1f6c23fd5d4aa10e0589fa8c168f3f27..0590a9352bd805a28b5b7ee9931be1ac6c2de6f4 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java @@ -105,14 +105,8 @@ public abstract class StateMachineProcedure * @param state the state enum object */ protected void setNextState(final TState state) { - if (aborted.get() && isRollbackSupported(getCurrentState())) { - setAbortFailure(getClass().getSimpleName(), "abort requested"); - } else { - if (aborted.get()) { - LOG.warn("ignoring abort request " + state); - } - setNextState(getStateId(state)); - } + setNextState(getStateId(state)); + failIfAborted(); } /** @@ -147,6 +141,8 @@ public abstract class StateMachineProcedure throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { updateTimestamp(); try { + failIfAborted(); + if (!hasMoreState() || isFailed()) return null; TState state = getCurrentState(); @@ -190,10 +186,11 @@ public abstract class StateMachineProcedure protected boolean abort(final TEnvironment env) { final boolean isDebugEnabled = LOG.isDebugEnabled(); final TState state = getCurrentState(); - if (isRollbackSupported(state)) { - if (isDebugEnabled) { - LOG.debug("abort requested for " + this + " state=" + state); - } + if (isDebugEnabled) { + LOG.debug("abort requested for " + this + " state=" + state); + } + + if (hasMoreState()) { aborted.set(true); return true; } else if (isDebugEnabled) { @@ -203,6 +200,20 @@ public abstract class StateMachineProcedure } /** + * If procedure has more states then abort it otherwise procedure is finished and abort can be + * ignored. + */ + protected final void failIfAborted() { + if (aborted.get()) { + if (hasMoreState()) { + setAbortFailure(getClass().getSimpleName(), "abort requested"); + } else { + LOG.warn("Ignoring abort request on state='" + getCurrentState() + "' for " + this); + } + } + } + + /** * Used by the default implementation of abort() to know if the current state can be aborted * and rollback can be triggered. */ @@ -219,7 +230,7 @@ public abstract class StateMachineProcedure return stateFlow != Flow.NO_MORE_STATE; } - private TState getCurrentState() { + protected TState getCurrentState() { return stateCount > 0 ? getState(states[stateCount-1]) : getInitialState(); } diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java index 82b767e13cd91a258e8f15c1f097f32ea2d4fade..8347dbf90b619e1c93240bbb16d159f3c2d2a740 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java @@ -93,6 +93,21 @@ public class TestStateMachineProcedure { } @Test + public void testAbortStuckProcedure() throws InterruptedException { + try { + procExecutor.getEnvironment().loop = true; + TestSMProcedure proc = new TestSMProcedure(); + long procId = procExecutor.submitProcedure(proc); + Thread.sleep(1000 + (int) (Math.random() * 4001)); + proc.abort(procExecutor.getEnvironment()); + ProcedureTestingUtility.waitProcedure(procExecutor, procId); + assertEquals(true, proc.isFailed()); + } finally { + procExecutor.getEnvironment().loop = false; + } + } + + @Test public void testChildOnLastStep() { long procId = procExecutor.submitProcedure(new TestSMProcedure()); ProcedureTestingUtility.waitProcedure(procExecutor, procId); @@ -142,7 +157,9 @@ public class TestStateMachineProcedure { env.execCount.incrementAndGet(); switch (state) { case STEP_1: - setNextState(TestSMProcedureState.STEP_2); + if (!env.loop) { + setNextState(TestSMProcedureState.STEP_2); + } break; case STEP_2: addChildProcedure(new SimpleChildProcedure()); @@ -190,5 +207,6 @@ public class TestStateMachineProcedure { AtomicInteger execCount = new AtomicInteger(0); AtomicInteger rollbackCount = new AtomicInteger(0); boolean triggerChildRollback = false; + boolean loop = false; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index 9d0a283c6a8bb56d8571ced79818ed8eadf5648a..bda68eb9fe484084c53df01a1d3263cf5de9fb8a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -146,6 +146,15 @@ public class DeleteTableProcedure } @Override + protected boolean abort(MasterProcedureEnv env) { + // TODO: Current behavior is: with no rollback and no abort support, procedure may stuck + // looping in retrying failing step forever. Default behavior of abort is changed to support + // aborting all procedures. Override the default wisely. Following code retains the current + // behavior. Revisit it later. + return isRollbackSupported(getCurrentState()) ? super.abort(env) : false; + } + + @Override protected void rollbackState(final MasterProcedureEnv env, final DeleteTableState state) { if (state == DeleteTableState.DELETE_TABLE_PRE_OPERATION) { // nothing to rollback, pre-delete is just table-state checks. -- 2.10.1 (Apple Git-78)