From 45cb9a4dbf1e48e34ebc7ebe220af55ae85c1138 Mon Sep 17 00:00:00 2001 Date: Thu, 26 Sep 2019 17:52:39 +0800 Subject: [PATCH] HBASE-22780 Assign failure for Missing table description file may make region STUCK in rit forever --- .../hadoop/hbase/regionserver/RSRpcServices.java | 3 + ...ssignSTUCKForeverForMissingTableDescriptor.java | 80 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignSTUCKForeverForMissingTableDescriptor.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index aa54876..5225b08 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2018,6 +2018,9 @@ public class RSRpcServices implements HBaseRPCErrorHandler, htds.put(region.getTable(), htd); } if (htd == null) { + if(regionServer.regionsInTransitionInRS.containsKey(encodedNameBytes)){ + regionServer.regionsInTransitionInRS.remove(encodedNameBytes); + } throw new IOException("Missing table descriptor for " + region.getEncodedName()); } // If there is no action in progress, we can submit a specific handler. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignSTUCKForeverForMissingTableDescriptor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignSTUCKForeverForMissingTableDescriptor.java new file mode 100644 index 0000000..a228ec1 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignSTUCKForeverForMissingTableDescriptor.java @@ -0,0 +1,80 @@ +package org.apache.hadoop.hbase.master.assignment; + +import static org.junit.Assert.assertTrue; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({MasterTests.class, MediumTests.class}) +public class TestAssignSTUCKForeverForMissingTableDescriptor { + +/** + * Integration test for condition described on HBASE-22780. Then, sub-sequent actions are: + */ + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestAssignSTUCKForeverForMissingTableDescriptor.class); + + @Rule + public TestName name = new TestName(); + + private static final Logger LOG = LoggerFactory.getLogger(TestAssignSTUCKForeverForMissingTableDescriptor.class); + + protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + LOG.warn("failure shutting down cluster", e); + } + } + @Test + public void testSTUCK() throws Exception { + UTIL.startMiniCluster(2); + TableName tableName = TableName.valueOf(name.getMethodName()); + HMaster master = UTIL.getMiniHBaseCluster().getMaster(); + byte[] family = Bytes.toBytes("f"); + UTIL.createTable(tableName, family); + final HRegion region = UTIL.getHBaseCluster().getRegions(tableName).get(0); + LOG.info("Starting tests for condition detected. Our target region: {}",region.getRegionInfo().getEncodedName()); + UTIL.unassignRegion(region.getRegionInfo().getRegionName()); + // rename the table description file to bak and move this region + Path root = UTIL.getDefaultRootDirPath(); + Path data = new Path(root , "data"); + Path space = new Path(data , "default"); + Path table = new Path(space , tableName.getNameAsString()); + Path tabledesc = new Path(table, ".tabledesc"); + Path bak = new Path(table,"bak-tabledesc"); + UTIL.getTestFileSystem().rename(tabledesc,bak); //trigger Missing table description error + master.getMasterProcedureExecutor().submitProcedure(master.getAssignmentManager().createAssignProcedure(region.getRegionInfo())); + Thread.sleep(6000); + UTIL.getTestFileSystem().rename(bak,tabledesc); //table description is recovery for some reason + // wait 20s to see if the reigon is ok later !!! + for( int i = 0 ; i ++ < 20;){ + LOG.info("target region is : {} now", master.getAssignmentManager().getRegionStates().getRegionStateNode(region.getRegionInfo())); + LOG.info("target region state is : {} now", master.getAssignmentManager().getRegionStates().getRegionState(region.getRegionInfo())); + master.getAssignmentManager().getRegionsInTransition().forEach(rit ->{ + LOG.info("region {} is in rit !!! " , rit); + }); + Thread.sleep(1000); + } + assertTrue(master.getAssignmentManager().getRegionStates().getRegionState(region.getRegionInfo()).getState().equals(RegionState.State.OPEN)); + } +} -- 1.8.3.1