From 08f671bc99eb9eb39cc82d2065ffa05cdf14a282 Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Wed, 10 May 2017 11:41:59 -0700 Subject: [PATCH] HBASE-18018 Changes to support abort for all procedures by default The default behavior for abort() method of StateMachineProcedure is changed to support aborting all procedures irrespective of if rollback is supported or not. Currently its observed that sometimes procedures may fail on a step which will be considered as retryable error as abort is not supported. As a result procedure may stuck in a endless loop repeating same step again.User should have an option to abort any stuck procedure and do clean up manually. Please refer to HBASE-18016 and discussion there. --- .../hbase/procedure2/StateMachineProcedure.java | 35 ++++++++++++++-------- .../procedure2/TestStateMachineProcedure.java | 20 ++++++++++++- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java index ea2a41fe1f6c23fd5d4aa10e0589fa8c168f3f27..fa1c54c43ec55fe60c89820a09ff527ad67044cb 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java @@ -105,14 +105,8 @@ public abstract class StateMachineProcedure * @param state the state enum object */ protected void setNextState(final TState state) { - if (aborted.get() && isRollbackSupported(getCurrentState())) { - setAbortFailure(getClass().getSimpleName(), "abort requested"); - } else { - if (aborted.get()) { - LOG.warn("ignoring abort request " + state); - } - setNextState(getStateId(state)); - } + setNextState(getStateId(state)); + failIfAborted(); } /** @@ -147,6 +141,8 @@ public abstract class StateMachineProcedure throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { updateTimestamp(); try { + failIfAborted(); + if (!hasMoreState() || isFailed()) return null; TState state = getCurrentState(); @@ -190,10 +186,11 @@ public abstract class StateMachineProcedure protected boolean abort(final TEnvironment env) { final boolean isDebugEnabled = LOG.isDebugEnabled(); final TState state = getCurrentState(); - if (isRollbackSupported(state)) { - if (isDebugEnabled) { - LOG.debug("abort requested for " + this + " state=" + state); - } + if (isDebugEnabled) { + LOG.debug("abort requested for " + this + " state=" + state); + } + + if (hasMoreState()) { aborted.set(true); return true; } else if (isDebugEnabled) { @@ -203,6 +200,20 @@ public abstract class StateMachineProcedure } /** + * If procedure has more states then abort it otherwise procedure is finished and abort can be + * ignored. + */ + protected final void failIfAborted() { + if (aborted.get()) { + if (hasMoreState()) { + setAbortFailure(getClass().getSimpleName(), "abort requested"); + } else { + LOG.warn("Ignoring abort request on state='" + getCurrentState() + "' for " + this); + } + } + } + + /** * Used by the default implementation of abort() to know if the current state can be aborted * and rollback can be triggered. */ diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java index 82b767e13cd91a258e8f15c1f097f32ea2d4fade..8347dbf90b619e1c93240bbb16d159f3c2d2a740 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java @@ -93,6 +93,21 @@ public class TestStateMachineProcedure { } @Test + public void testAbortStuckProcedure() throws InterruptedException { + try { + procExecutor.getEnvironment().loop = true; + TestSMProcedure proc = new TestSMProcedure(); + long procId = procExecutor.submitProcedure(proc); + Thread.sleep(1000 + (int) (Math.random() * 4001)); + proc.abort(procExecutor.getEnvironment()); + ProcedureTestingUtility.waitProcedure(procExecutor, procId); + assertEquals(true, proc.isFailed()); + } finally { + procExecutor.getEnvironment().loop = false; + } + } + + @Test public void testChildOnLastStep() { long procId = procExecutor.submitProcedure(new TestSMProcedure()); ProcedureTestingUtility.waitProcedure(procExecutor, procId); @@ -142,7 +157,9 @@ public class TestStateMachineProcedure { env.execCount.incrementAndGet(); switch (state) { case STEP_1: - setNextState(TestSMProcedureState.STEP_2); + if (!env.loop) { + setNextState(TestSMProcedureState.STEP_2); + } break; case STEP_2: addChildProcedure(new SimpleChildProcedure()); @@ -190,5 +207,6 @@ public class TestStateMachineProcedure { AtomicInteger execCount = new AtomicInteger(0); AtomicInteger rollbackCount = new AtomicInteger(0); boolean triggerChildRollback = false; + boolean loop = false; } } -- 2.10.1 (Apple Git-78)