diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java index a15a2f9..9f2a4e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java @@ -585,10 +585,9 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { Arrays.sort(regionInfo, RegionInfo.COMPARATOR); schedLock(); try { - // If there is parent procedure, it would have already taken xlock, so no need to take - // shared lock here. Otherwise, take shared lock. - if (!procedure.hasParent() - && waitTableQueueSharedLock(procedure, table) == null) { + // HBASE-20846 acquire the shared lock everytime, in case master restart + // and child procedure was executed first than parent + if (waitTableQueueSharedLock(procedure, table) == null) { return true; } @@ -614,7 +613,7 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { } } - if (!hasLock && !procedure.hasParent()) { + if (!hasLock) { wakeTableSharedLock(procedure, table); } return !hasLock; @@ -664,11 +663,9 @@ public class MasterProcedureScheduler extends AbstractProcedureScheduler { wakeProcedure(nextProcs[i]); } wakePollIfNeeded(numProcs); - if (!procedure.hasParent()) { - // release the table shared-lock. - // (if we have a parent, it is holding an xlock so we didn't take the shared-lock) - wakeTableSharedLock(procedure, table); - } + // HBASE-20846, sicne we acquire the shared lock every time, + // release the table shared-lock here. + wakeTableSharedLock(procedure, table); } finally { schedUnlock(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSharedTableLockAfterRestartMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSharedTableLockAfterRestartMaster.java new file mode 100644 index 0000000..bdfaddd --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestSharedTableLockAfterRestartMaster.java @@ -0,0 +1,291 @@ +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestSharedTableLockAfterRestartMaster { + private static Logger LOG = LoggerFactory + .getLogger(TestSharedTableLockAfterRestartMaster.class.getName()); + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static TableName TABLE_NAME = TableName.valueOf("test"); + + private static byte[] CF = Bytes.toBytes("cf"); + + @BeforeClass + public static void setUp() throws Exception { + //UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + UTIL.startMiniCluster(3); + UTIL.getAdmin().balancerSwitch(false, true); + UTIL.createTable(TABLE_NAME, CF); + UTIL.waitTableAvailable(TABLE_NAME); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + } + + + private RegionInfo getTableRegionInfo() { + JVMClusterUtil.RegionServerThread rsThread = null; + for (JVMClusterUtil.RegionServerThread t : UTIL.getMiniHBaseCluster() + .getRegionServerThreads()) { + if (!t.getRegionServer().getRegions(TABLE_NAME).isEmpty()) { + rsThread = t; + break; + } + } + //find the rs and hri of the table + HRegionServer rs = rsThread.getRegionServer(); + RegionInfo hri = rs.getRegions(TABLE_NAME).get(0).getRegionInfo(); + return hri; + } + + private Procedure getProcedure( + ProcedureExecutor executor, final Class procedureClass) { + List> procedureList = executor.getProcedures().stream() + .filter(p -> procedureClass.isInstance(p)) + .collect(Collectors.toList()); + return procedureList.get(0); + } + + @Test + public void test() throws Exception { + RegionInfo regionInfo = getTableRegionInfo(); + HMaster master = UTIL.getMiniHBaseCluster().getMaster(); + final ProcedureExecutor executor = master + .getMasterProcedureExecutor(); + DummyRegionParentProcedure dummyRegionParentProcedure = new DummyRegionParentProcedure( + executor.getEnvironment(), regionInfo); + long regionProcID = executor.submitProcedure(dummyRegionParentProcedure); + LOG.error("Region proc id: " + regionProcID); + dummyRegionParentProcedure.waitUntilArrive(); + Thread.sleep(500); + DummyRegionProcedure dummyRegionProcedure = (DummyRegionProcedure) getProcedure( + executor, DummyRegionProcedure.class); + dummyRegionProcedure.waitUntilArrive(); + DummyTableProcedue dummyTableProcedue = new DummyTableProcedue( + executor.getEnvironment(), TABLE_NAME); + long tableProcID = executor.submitProcedure(dummyTableProcedue); + LOG.error("Table proc id: " + tableProcID); + //wait until dummyTableProcedue arrive + UTIL.waitFor(30000, () -> executor.getProcedures().stream() + .filter(p -> p instanceof DummyTableProcedue) + .map(p -> (DummyTableProcedue) p) + .anyMatch(p -> p.getTableName().equals(TABLE_NAME))); + //sleep a while to wait the request reached + Thread.sleep(500); + Assert.assertTrue("DummyTableProcedure should not finished", + !dummyTableProcedue.isFinished()); + //restart master + LOG.error("begin to restart master"); + UTIL.getMiniHBaseCluster().getMaster().abort("test"); + UTIL.getMiniHBaseCluster().startMaster(); + while (UTIL.getMiniHBaseCluster().getMaster() == null || !UTIL + .getMiniHBaseCluster().getMaster().isInitialized()) { + Thread.sleep(500); + } + LOG.error("Master restart finished"); + master = UTIL.getMiniHBaseCluster().getMaster(); + final ProcedureExecutor executor2 = master + .getMasterProcedureExecutor(); + //wait master restored procedure + UTIL.waitFor(30000, () -> executor2.getProcedures().stream() + .filter(p -> p instanceof DummyTableProcedue) + .map(p -> (DummyTableProcedue) p) + .anyMatch(p -> p.getTableName().equals(TABLE_NAME))); + + + DummyTableProcedue restoredDummyTableProcedure = (DummyTableProcedue) getProcedure( + executor2, DummyTableProcedue.class); + DummyRegionProcedure restoredDummyRegionProcedure = (DummyRegionProcedure) getProcedure( + executor2, DummyRegionProcedure.class); + Thread.sleep(500); + LOG.error("Start to check dummyTableProcedue"); + Assert.assertTrue("DummyTableProcedure should not finished " + + "since dummyRegionProcedure " + + "is still stuck there", + !executor2.isFinished(tableProcID)); + LOG.error("Start to resume DummyRegionProcedure"); + restoredDummyRegionProcedure.resume(); + UTIL.waitFor(30000, () -> executor2.isFinished(tableProcID)); + } + + public enum DummyTableProcedureState { + STATE + } + + + public enum DummyRegionParentState { + STATE + } + + public static class DummyRegionParentProcedure + extends AbstractStateMachineRegionProcedure { + + private static AtomicBoolean dummyRegionProcedureSpawned = new AtomicBoolean(false); + + private final CountDownLatch arrive = new CountDownLatch(1); + + public DummyRegionParentProcedure() { + } + + public DummyRegionParentProcedure(MasterProcedureEnv env, RegionInfo hri) { + super(env, hri); + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.REGION_EDIT; + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, DummyRegionParentState state) + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + if(dummyRegionProcedureSpawned.compareAndSet(false, true)) { + addChildProcedure(new DummyRegionProcedure(env, getRegion())); + arrive.countDown(); + return Flow.HAS_MORE_STATE; + } else { + return Flow.NO_MORE_STATE; + } + } + + @Override + protected void rollbackState(MasterProcedureEnv env, DummyRegionParentState state) + throws IOException, InterruptedException { + } + + @Override + protected DummyRegionParentState getState(int stateId) { + return DummyRegionParentState.STATE; + } + + @Override + protected int getStateId(DummyRegionParentState state) { + return 0; + } + + @Override + protected DummyRegionParentState getInitialState() { + return DummyRegionParentState.STATE; + } + + public void waitUntilArrive() throws InterruptedException { + arrive.await(); + } + + public void resume() { + + } + } + + + public static class DummyTableProcedue extends + AbstractStateMachineTableProcedure { + private TableName tableName; + + private boolean finished = false; + + public DummyTableProcedue() { + super(); + } + + public DummyTableProcedue(final MasterProcedureEnv env, TableName tableName) { + super(env); + this.tableName = tableName; + } + + @Override + public TableName getTableName() { + return tableName; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.EDIT; + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, + DummyTableProcedureState dummyTableProcedureState) + throws ProcedureSuspendedException, ProcedureYieldException, + InterruptedException { + return Flow.NO_MORE_STATE; + } + + + @Override + protected void rollbackState(MasterProcedureEnv env, + DummyTableProcedureState dummyTableProcedureState) + throws IOException, InterruptedException { + + } + + @Override + protected DummyTableProcedureState getState(int stateId) { + return DummyTableProcedureState.STATE; + } + + @Override + protected int getStateId( + DummyTableProcedureState dummyTableProcedureState) { + return 0; + } + + @Override + protected DummyTableProcedureState getInitialState() { + return DummyTableProcedureState.STATE; + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) + throws IOException { + super.serializeStateData(serializer); + serializer.serialize(ProtobufUtil.toProtoTableName(getTableName())); + + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) + throws IOException { + super.deserializeStateData(serializer); + this.tableName = ProtobufUtil + .toTableName(serializer.deserialize(HBaseProtos.TableName.class)); + + } + } +}