From c05340f721eb71f48870da4ce65b38f4041bf50e Mon Sep 17 00:00:00 2001 From: zhangduo Date: Wed, 24 Oct 2018 23:12:17 +0800 Subject: [PATCH] HBASE-21375 UT to reproduce the problem --- .../hbase/procedure2/ProcedureExecutor.java | 5 + .../procedure2/ProcedureTestingUtility.java | 2 +- .../procedure/TestSchedulerQueueDeadLock.java | 207 ++++++++++++++++++ 3 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSchedulerQueueDeadLock.java diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index 438b495365..fd82a4d1ca 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -2042,6 +2042,11 @@ public class ProcedureExecutor { return scheduler; } + @VisibleForTesting + public IdLock getProcExecutionLock() { + return procExecutionLock; + } + // ========================================================================== // Worker Thread // ========================================================================== diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java index 95e032043f..5a7a664c00 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/ProcedureTestingUtility.java @@ -403,7 +403,7 @@ public class ProcedureTestingUtility { public NoopProcedure() {} @Override - protected Procedure[] execute(TEnv env) + protected Procedure[] execute(TEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { return null; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSchedulerQueueDeadLock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSchedulerQueueDeadLock.java new file mode 100644 index 0000000000..6d3e126fe0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSchedulerQueueDeadLock.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.NoopProcedure; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.IdLock; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +@Category({ MasterTests.class, LargeTests.class }) +public class TestSchedulerQueueDeadLock { + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static final TableName TABLE_NAME = TableName.valueOf("deadlock"); + + private static final class TestEnv { + private final MasterProcedureScheduler scheduler; + + public TestEnv(MasterProcedureScheduler scheduler) { + this.scheduler = scheduler; + } + + public MasterProcedureScheduler getScheduler() { + return scheduler; + } + } + + public static final class TableSharedProcedure extends NoopProcedure + implements TableProcedureInterface { + + private final CountDownLatch latch = new CountDownLatch(1); + + @Override + protected Procedure[] execute(TestEnv env) + throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { + latch.await(); + return null; + } + + @Override + protected LockState acquireLock(TestEnv env) { + if (env.getScheduler().waitTableSharedLock(this, getTableName())) { + return LockState.LOCK_EVENT_WAIT; + } + return LockState.LOCK_ACQUIRED; + } + + @Override + protected void releaseLock(TestEnv env) { + env.getScheduler().wakeTableSharedLock(this, getTableName()); + } + + @Override + protected boolean holdLock(TestEnv env) { + return true; + } + + @Override + public TableName getTableName() { + return TABLE_NAME; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.READ; + } + + @Override + protected void setProcId(long procId) { + // this is a hack to make this procedure loaded after the procedure below as we will sort the + // procedures by id when loading. + super.setProcId(2L); + } + } + + public static final class TableExclusiveProcedure extends NoopProcedure + implements TableProcedureInterface { + + private final CountDownLatch latch = new CountDownLatch(1); + + @Override + protected Procedure[] execute(TestEnv env) + throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { + latch.await(); + return null; + } + + @Override + protected LockState acquireLock(TestEnv env) { + if (env.getScheduler().waitTableExclusiveLock(this, getTableName())) { + return LockState.LOCK_EVENT_WAIT; + } + return LockState.LOCK_ACQUIRED; + } + + @Override + protected void releaseLock(TestEnv env) { + env.getScheduler().wakeTableExclusiveLock(this, getTableName()); + } + + @Override + protected boolean holdLock(TestEnv env) { + return true; + } + + @Override + public TableName getTableName() { + return TABLE_NAME; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.EDIT; + } + + @Override + protected void setProcId(long procId) { + // this is a hack to make this procedure loaded before the procedure above as we will sort the + // procedures by id when loading. + super.setProcId(1L); + } + } + + @AfterClass + public static void tearDownAfterClass() throws IOException { + UTIL.cleanupTestDir(); + } + + private WALProcedureStore procStore; + + private ProcedureExecutor procExec; + + @Rule + public final TestName name = new TestName(); + + @Before + public void setUp() throws IOException { + UTIL.getConfiguration().setInt("hbase.procedure.worker.stuck.threshold.msec", 6000000); + procStore = ProcedureTestingUtility.createWalStore(UTIL.getConfiguration(), + UTIL.getDataTestDir(name.getMethodName())); + procStore.start(2); + MasterProcedureScheduler scheduler = new MasterProcedureScheduler(); + procExec = new ProcedureExecutor<>(UTIL.getConfiguration(), new TestEnv(scheduler), procStore, + scheduler); + procExec.init(2, false); + } + + @After + public void tearDown() { + procExec.stop(); + procStore.stop(false); + } + + @Test + public void testTableProcedure() throws Exception { + // let the shared procedure run first, but let it have a greater procId so when loading it will + // be loaded at last. + long procId1 = procExec.submitProcedure(new TableSharedProcedure()); + long procId2 = procExec.submitProcedure(new TableExclusiveProcedure()); + IdLock procExecutionLock = procExec.getProcExecutionLock(); + IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId2); + procExec.startWorkers(); + UTIL.waitFor(10000, () -> ((TableSharedProcedure) procExec.getProcedure(procId1)).hasLock()); + procExecutionLock.releaseLockEntry(lockEntry); + + ProcedureTestingUtility.restart(procExec); + + ((TableSharedProcedure) procExec.getProcedure(procId1)).latch.countDown(); + ((TableExclusiveProcedure) procExec.getProcedure(procId2)).latch.countDown(); + + UTIL.waitFor(60000, () -> procExec.isFinished(procId1)); + UTIL.waitFor(60000, () -> procExec.isFinished(procId2)); + } +} -- 2.17.1