From 4f909298df3bdb0ce1813805acbdcc385a7079fa Mon Sep 17 00:00:00 2001 From: Esteban Gutierrez Date: Fri, 21 Jul 2017 13:13:00 -0500 Subject: [PATCH] HBASE-18024 HRegion#initializeRegionInternals should not re-create .hregioninfo file when the region directory no longer exists --- .../apache/hadoop/hbase/regionserver/HRegion.java | 11 ++++- .../hbase/regionserver/HRegionFileSystem.java | 28 ++++++++++-- .../hadoop/hbase/regionserver/TestHRegion.java | 7 ++- .../hadoop/hbase/regionserver/TestRegionOpen.java | 53 +++++++++++++++++++++- .../regionserver/TestStoreFileRefresherChore.java | 1 + .../TestWALMonotonicallyIncreasingSeqId.java | 5 +- 6 files changed, 94 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 31357adac4..4fa9e4f17d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -888,8 +888,15 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } // Write HRI to a file in case we need to recover hbase:meta - status.setStatus("Writing region info on filesystem"); - fs.checkRegionInfoOnFilesystem(); + // Only the primary replica should write .regioninfo + if (this.getRegionInfo().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { + status.setStatus("Writing region info on filesystem"); + fs.checkRegionInfoOnFilesystem(); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping creation of .regioninfo file for " + this.getRegionInfo()); + } + } // Initialize all the HStores status.setStatus("Initializing all the Stores"); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 10412605f4..8febe6ed68 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -893,9 +893,19 @@ public class HRegionFileSystem { // only should be sufficient. I don't want to read the file every time to check if it pb // serialized. byte[] content = getRegionInfoFileContent(regionInfoForFs); + + // Verify if the region directory exists before opening a region. We need to do this since if + // the region directory doesn't exist we will re-create the region directory and a new HRI + // when HRegion.openHRegion() is called. try { - Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); + FileStatus status = fs.getFileStatus(getRegionDir()); + } catch (FileNotFoundException e) { + LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() + + " on table " + regionInfo.getTable()); + } + try { + Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); FileStatus status = fs.getFileStatus(regionInfoFile); if (status != null && status.getLen() == content.length) { // Then assume the content good and move on. @@ -988,7 +998,10 @@ public class HRegionFileSystem { } // Write HRI to a file in case we need to recover hbase:meta - regionFs.writeRegionInfoOnFilesystem(false); + // Only primary replicas should write region info + if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { + regionFs.writeRegionInfoOnFilesystem(false); + } return regionFs; } @@ -1018,8 +1031,15 @@ public class HRegionFileSystem { regionFs.cleanupSplitsDir(); regionFs.cleanupMergesDir(); - // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta - regionFs.checkRegionInfoOnFilesystem(); + // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta + // Only create HRI if we are the default replica + if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { + regionFs.checkRegionInfoOnFilesystem(); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); + } + } } return regionFs; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 2838949330..9db7c160f2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -5899,6 +5899,10 @@ public class TestHRegion { @Test public void testCloseRegionWrittenToWAL() throws Exception { + + Path rootDir = new Path(dir + name.getMethodName()); + FSUtils.setRootDir(TEST_UTIL.getConfiguration(), rootDir); + final ServerName serverName = ServerName.valueOf("testCloseRegionWrittenToWAL", 100, 42); final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName)); @@ -5916,7 +5920,8 @@ public class TestHRegion { when(rss.getWAL((HRegionInfo) any())).thenReturn(wal); - // open a region first so that it can be closed later + // create and then open a region first so that it can be closed later + region = HRegion.createHRegion(hri, rootDir, TEST_UTIL.getConfiguration(), htd, rss.getWAL(hri)); region = HRegion.openHRegion(hri, htd, rss.getWAL(hri), TEST_UTIL.getConfiguration(), rss, null); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java index f49bdb1145..66c3075ad6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionOpen.java @@ -19,26 +19,39 @@ package org.apache.hadoop.hbase.regionserver; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; import java.util.concurrent.ThreadPoolExecutor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.executor.ExecutorType; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import static org.junit.Assert.fail; @Category({MediumTests.class, RegionServerTests.class}) public class TestRegionOpen { @@ -47,7 +60,9 @@ public class TestRegionOpen { private static final int NB_SERVERS = 1; private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); - final TableName tableName = TableName.valueOf(TestRegionOpen.class.getSimpleName()); + + @Rule + public TestName name = new TestName(); @BeforeClass public static void before() throws Exception { @@ -65,6 +80,7 @@ public class TestRegionOpen { @Test(timeout = 60000) public void testPriorityRegionIsOpenedWithSeparateThreadPool() throws Exception { + final TableName tableName = TableName.valueOf(TestRegionOpen.class.getSimpleName()); ThreadPoolExecutor exec = getRS().getExecutorService() .getExecutorThreadPool(ExecutorType.RS_OPEN_PRIORITY_REGION); @@ -80,4 +96,39 @@ public class TestRegionOpen { assertEquals(1, exec.getCompletedTaskCount()); } + + @Test(timeout = 60000) + public void testNonExistingRegionReplica() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + final byte[] FAMILYNAME = Bytes.toBytes("fam"); + FileSystem fs = HTU.getTestFileSystem(); + Admin admin = HTU.getAdmin(); + Configuration conf = HTU.getConfiguration(); + Path rootDir = HTU.getDataTestDirOnTestFS(); + + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(FAMILYNAME)); + admin.createTable(htd); + HTU.waitUntilNoRegionsInTransition(60000); + + // Create new HRI with non-default region replica id + HRegionInfo hri = new HRegionInfo(htd.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("B"), false, + System.currentTimeMillis(), 1); + HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, + FSUtils.getTableDir(rootDir, hri.getTable()), hri); + Path regionDir = regionFs.getRegionDir(); + try { + HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir); + } catch (IOException e) { + LOG.info("Caught expected IOE due missing .regioninfo file, due: " + e.getMessage() + " skipping region open."); + // We should only have 1 region online + List regions = admin.getTableRegions(tableName); + LOG.info("Regions: " + regions); + if (regions.size() != 1) { + fail("Table " + tableName + " should have only one region, but got more: " + regions); + } + return; + } + fail("Should have thrown IOE when attempting to open a non-existing region."); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java index 99dd00db99..5ac7efb4eb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java @@ -176,6 +176,7 @@ public class TestStoreFileRefresherChore { when(regionServer.getConfiguration()).thenReturn(TEST_UTIL.getConfiguration()); HTableDescriptor htd = getTableDesc(TableName.valueOf(name.getMethodName()), families); + htd.setRegionReplication(2); Region primary = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 0); Region replica1 = initHRegion(htd, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, 1); regions.add(primary); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALMonotonicallyIncreasingSeqId.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALMonotonicallyIncreasingSeqId.java index e63bad9124..6294541ce4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALMonotonicallyIncreasingSeqId.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALMonotonicallyIncreasingSeqId.java @@ -99,9 +99,8 @@ public class TestWALMonotonicallyIncreasingSeqId { this.walConf = walConf; wals = new WALFactory(walConf, null, "log_" + replicaId); ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null); - HRegion region = new HRegion(fs, wals.getWAL(info.getEncodedNameAsBytes(), - info.getTable().getNamespace()), conf, htd, null); - region.initialize(); + HRegion region = HRegion.createHRegion(info, TEST_UTIL.getDefaultRootDirPath(), conf, htd, + wals.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace())); return region; } -- 2.13.3