From a1c8318a993c4273bbc8610d9202ea8c0a4b247c Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 6 Jun 2018 14:17:53 -0700 Subject: [PATCH] Test for HBASE-20671 --- .../org/apache/hadoop/hbase/MetaTableAccessor.java | 2 +- .../TestMergeRegionsWithMasterCrash.java | 208 +++++++++++++++++++++ .../assignment/TestMergeTableRegionsProcedure.java | 67 ++++--- 3 files changed, 241 insertions(+), 36 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeRegionsWithMasterCrash.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 1880a0d7c0..a81f46fa2e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -1928,7 +1928,7 @@ public class MetaTableAccessor { } private static void debugLogMutation(Mutation p) throws IOException { - METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); + METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON(), new RuntimeException()); } private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeRegionsWithMasterCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeRegionsWithMasterCrash.java new file mode 100644 index 0000000000..184f4ab70d --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeRegionsWithMasterCrash.java @@ -0,0 +1,208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Threads; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * To test HBASE-20671 "Merged region brought back to life causing RS to be killed by Master". + * Bulk copied from adjacent {@link TestMergeTableRegionsProcedure}. Uses some of its methods. + */ +@Category({MasterTests.class, MediumTests.class}) +public class TestMergeRegionsWithMasterCrash { + private static final Logger LOG = LoggerFactory.getLogger(TestMergeRegionsWithMasterCrash.class); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMergeRegionsWithMasterCrash.class); + + @Rule public final TestName name = new TestName(); + + protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final int initialRegionCount = 4; + private static final byte[] FAMILY = Bytes.toBytes("FAMILY"); + final static Configuration CONF = UTIL.getConfiguration(); + private static Admin ADMIN; + + private AssignmentManager am; + private ProcedureMetrics mergeProcMetrics; + private ProcedureMetrics assignProcMetrics; + private ProcedureMetrics unassignProcMetrics; + + @BeforeClass + public static void setupCluster() throws Exception { + TestMergeTableRegionsProcedure.setupConf(CONF); + UTIL.startMiniCluster(2, 1); + ADMIN = UTIL.getAdmin(); + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + LOG.warn("failure shutting down cluster", e); + } + } + + @Before + public void setup() throws Exception { + TestMergeTableRegionsProcedure.resetProcExecutorTestingKillFlag(UTIL); + MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); + MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); + // Turn off balancer so it doesn't cut in and mess up our placements. + ADMIN.balancerSwitch(false, true); + // Turn off the meta scanner so it don't remove parent on us. + UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); + TestMergeTableRegionsProcedure.resetProcExecutorTestingKillFlag(UTIL); + am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); + assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); + unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); + } + + @After + public void tearDown() throws Exception { + TestMergeTableRegionsProcedure.resetProcExecutorTestingKillFlag(UTIL); + for (TableDescriptor htd: ADMIN.listTableDescriptors()) { + LOG.info("Tear down, remove table=" + htd.getTableName()); + UTIL.deleteTable(htd.getTableName()); + } + } + + /** + * This test is same as {@link TestMergeTableRegionsProcedure#testMergeTwoRegions()} except + * it throws in the kill of the Master before hbase:meta gets cleaned up. Has two Masters + * running to repro HBASE-20671. + * @throws Exception + */ + @Test + public void test() throws Exception { + final TableName tableName = TableName.valueOf(this.name.getMethodName()); + final ProcedureExecutor procExec = + TestMergeTableRegionsProcedure.getMasterProcedureExecutor(UTIL); + + List tableRegions = + TestMergeTableRegionsProcedure.createTable(UTIL, tableName, initialRegionCount); + + RegionInfo[] regionsToMerge = new RegionInfo[2]; + regionsToMerge[0] = tableRegions.get(0); + regionsToMerge[1] = tableRegions.get(1); + + // collect AM metrics before test + long mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); + long mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); + long assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); + long assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); + long unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); + long unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); + + MergeTableRegionsProcedure proc = + new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true); + long procId = procExec.submitProcedure(proc); + ProcedureTestingUtility.waitProcedure(procExec, procId); + ProcedureTestingUtility.assertProcNotFailed(procExec, procId); + TestMergeTableRegionsProcedure.assertRegionCount(UTIL, tableName, + initialRegionCount - 1); + + assertEquals(mergeSubmittedCount + 1, + mergeProcMetrics.getSubmittedCounter().getCount()); + assertEquals(mergeFailedCount, + mergeProcMetrics.getFailedCounter().getCount()); + assertEquals(assignSubmittedCount + 1, + assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, + assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 2, + unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, + unassignProcMetrics.getFailedCounter().getCount()); + + Pair pair = + MetaTableAccessor.getRegionsFromMergeQualifier(UTIL.getConnection(), + proc.getMergedRegion().getRegionName()); + assertTrue(pair.getFirst() != null && pair.getSecond() != null); + + // Put back one of the parents just to repro HBASE-20671. + MetaTableAccessor. + + // Now kill the Master and let backup take over to repro HBASE-20671. + ServerName masterServerName = UTIL.getHBaseCluster().getMaster().getServerName(); + UTIL.getHBaseCluster().killMaster(masterServerName); + while (true) { + HMaster master = UTIL.getHBaseCluster().getMaster(); + if (master != null) { + ServerName sn = UTIL.getHBaseCluster().getMaster().getServerName(); + // If new Master and not same as old Master, proceed. + if (sn != null && !sn.equals(masterServerName)) { + break; + } + } + Threads.sleep(100); + } + // Ok, new Master is up. Need to hang out to see if the merged parents get reassigned. + Threads.sleep(10000); + + + // Can I purge the merged regions from hbase:meta? Check that all went + // well by looking at the merged row up in hbase:meta. It should have no + // more mention of the merged regions; they are purged as last step in + // the merged regions cleanup. + UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); + UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow(); + while (pair != null && pair.getFirst() != null && pair.getSecond() != null) { + pair = MetaTableAccessor.getRegionsFromMergeQualifier(UTIL.getConnection(), + proc.getMergedRegion().getRegionName()); + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java index 094a5a0cf1..60a564d1ed 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java @@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.TableName; @@ -64,8 +63,6 @@ public class TestMergeTableRegionsProcedure { @Rule public final TestName name = new TestName(); protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private static long nonceGroup = HConstants.NO_NONCE; - private static long nonce = HConstants.NO_NONCE; private static final int initialRegionCount = 4; private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); @@ -83,7 +80,7 @@ public class TestMergeTableRegionsProcedure { private long unassignSubmittedCount = 0; private long unassignFailedCount = 0; - private static void setupConf(Configuration conf) { + static void setupConf(Configuration conf) { // Reduce the maximum attempts to speed up the test conf.setInt("hbase.assignment.maximum.attempts", 3); conf.setInt("hbase.master.maximum.ping.server.attempts", 3); @@ -109,15 +106,14 @@ public class TestMergeTableRegionsProcedure { @Before public void setup() throws Exception { - resetProcExecutorTestingKillFlag(); - nonceGroup = - MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); - nonce = MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); + resetProcExecutorTestingKillFlag(UTIL); + MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); + MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); // Turn off balancer so it doesn't cut in and mess up our placements. UTIL.getHBaseAdmin().setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); - resetProcExecutorTestingKillFlag(); + resetProcExecutorTestingKillFlag(UTIL); am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); @@ -126,15 +122,15 @@ public class TestMergeTableRegionsProcedure { @After public void tearDown() throws Exception { - resetProcExecutorTestingKillFlag(); + resetProcExecutorTestingKillFlag(UTIL); for (HTableDescriptor htd: UTIL.getHBaseAdmin().listTables()) { LOG.info("Tear down, remove table=" + htd.getTableName()); UTIL.deleteTable(htd.getTableName()); } } - private void resetProcExecutorTestingKillFlag() { - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + static void resetProcExecutorTestingKillFlag(HBaseTestingUtility htu) { + final ProcedureExecutor procExec = getMasterProcedureExecutor(htu); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); assertTrue("expected executor to be running", procExec.isRunning()); } @@ -145,9 +141,9 @@ public class TestMergeTableRegionsProcedure { @Test public void testMergeTwoRegions() throws Exception { final TableName tableName = TableName.valueOf(this.name.getMethodName()); - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + final ProcedureExecutor procExec = getMasterProcedureExecutor(UTIL); - List tableRegions = createTable(tableName); + List tableRegions = createTable(UTIL, tableName, initialRegionCount); RegionInfo[] regionsToMerge = new RegionInfo[2]; regionsToMerge[0] = tableRegions.get(0); @@ -161,7 +157,7 @@ public class TestMergeTableRegionsProcedure { long procId = procExec.submitProcedure(proc); ProcedureTestingUtility.waitProcedure(procExec, procId); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); - assertRegionCount(tableName, initialRegionCount - 1); + assertRegionCount(UTIL, tableName, initialRegionCount - 1); assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); @@ -193,9 +189,9 @@ public class TestMergeTableRegionsProcedure { @Test public void testMergeRegionsConcurrently() throws Exception { final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently"); - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + final ProcedureExecutor procExec = getMasterProcedureExecutor(UTIL); - List tableRegions = createTable(tableName); + List tableRegions = createTable(UTIL, tableName, initialRegionCount); RegionInfo[] regionsToMerge1 = new RegionInfo[2]; RegionInfo[] regionsToMerge2 = new RegionInfo[2]; @@ -215,7 +211,7 @@ public class TestMergeTableRegionsProcedure { ProcedureTestingUtility.waitProcedure(procExec, procId2); ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); - assertRegionCount(tableName, initialRegionCount - 2); + assertRegionCount(UTIL, tableName, initialRegionCount - 2); assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); @@ -228,9 +224,9 @@ public class TestMergeTableRegionsProcedure { @Test public void testRecoveryAndDoubleExecution() throws Exception { final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution"); - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + final ProcedureExecutor procExec = getMasterProcedureExecutor(UTIL); - List tableRegions = createTable(tableName); + List tableRegions = createTable(UTIL, tableName, initialRegionCount); ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); @@ -246,15 +242,15 @@ public class TestMergeTableRegionsProcedure { MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); - assertRegionCount(tableName, initialRegionCount - 1); + assertRegionCount(UTIL, tableName, initialRegionCount - 1); } @Test public void testRollbackAndDoubleExecution() throws Exception { final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution"); - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + final ProcedureExecutor procExec = getMasterProcedureExecutor(UTIL); - List tableRegions = createTable(tableName); + List tableRegions = createTable(UTIL, tableName, initialRegionCount); ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); @@ -277,9 +273,9 @@ public class TestMergeTableRegionsProcedure { @Test public void testMergeWithoutPONR() throws Exception { final TableName tableName = TableName.valueOf("testMergeWithoutPONR"); - final ProcedureExecutor procExec = getMasterProcedureExecutor(); + final ProcedureExecutor procExec = getMasterProcedureExecutor(UTIL); - List tableRegions = createTable(tableName); + List tableRegions = createTable(UTIL, tableName, initialRegionCount); ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); @@ -300,10 +296,11 @@ public class TestMergeTableRegionsProcedure { MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); ProcedureTestingUtility.waitProcedure(procExec, procId); - assertRegionCount(tableName, initialRegionCount - 1); + assertRegionCount(UTIL, tableName, initialRegionCount - 1); } - private List createTable(final TableName tableName) + static List createTable(HBaseTestingUtility htu, final TableName tableName, + int initialRegionCount) throws Exception { HTableDescriptor desc = new HTableDescriptor(tableName); desc.addFamily(new HColumnDescriptor(FAMILY)); @@ -311,20 +308,20 @@ public class TestMergeTableRegionsProcedure { for (int i = 0; i < splitRows.length; ++i) { splitRows[i] = Bytes.toBytes(String.format("%d", i)); } - admin.createTable(desc, splitRows); - return assertRegionCount(tableName, initialRegionCount); + htu.getAdmin().createTable(desc, splitRows); + return assertRegionCount(htu, tableName, initialRegionCount); } - public List assertRegionCount(final TableName tableName, final int nregions) - throws Exception { - UTIL.waitUntilNoRegionsInTransition(); - List tableRegions = admin.getRegions(tableName); + static List assertRegionCount(HBaseTestingUtility htu, final TableName tableName, + final int nregions) throws Exception { + htu.waitUntilNoRegionsInTransition(); + List tableRegions = htu.getAdmin().getRegions(tableName); assertEquals(nregions, tableRegions.size()); return tableRegions; } - private ProcedureExecutor getMasterProcedureExecutor() { - return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); + static ProcedureExecutor getMasterProcedureExecutor(HBaseTestingUtility htu) { + return htu.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } private void collectAssignmentManagerMetrics() { -- 2.16.3