From 0915c92c8b1aab287b666069ad8f43259f6f8824 Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Wed, 27 Feb 2019 17:47:47 +0800 Subject: [PATCH] HBASE-21967 Split TestServerCrashProcedure and TestServerCrashProcedureWithReplicas --- .../hadoop/hbase/master/procedure/TestSCP.java | 213 ++++++++++++++++++ .../master/procedure/TestSCPWithReplicas.java | 84 +++++++ .../TestSCPWithReplicasWithoutZKCoordinated.java | 40 ++++ .../procedure/TestSCPWithoutZKCoordinated.java | 40 ++++ .../master/procedure/TestServerCrashProcedure.java | 244 --------------------- .../TestServerCrashProcedureWithReplicas.java | 84 ------- 6 files changed, 377 insertions(+), 328 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCP.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicas.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicasWithoutZKCoordinated.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithoutZKCoordinated.java delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedureWithReplicas.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCP.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCP.java new file mode 100644 index 0000000..b3fbefc --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCP.java @@ -0,0 +1,213 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; +import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({MasterTests.class, LargeTests.class}) +public class TestSCP { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestSCP.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestSCP.class); + + protected HBaseTestingUtility util; + + protected void setupConf(Configuration conf) { + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + conf.set("hbase.balancer.tablesOnMaster", "none"); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); + conf.setInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER, 3); + conf.setBoolean("hbase.split.writer.creation.bounded", true); + conf.setInt("hbase.regionserver.hlog.splitlog.writer.threads", 8); + conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, true); + } + + @Before + public void setup() throws Exception { + this.util = new HBaseTestingUtility(); + setupConf(this.util.getConfiguration()); + startMiniCluster(); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( + this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); + } + + protected void startMiniCluster() throws Exception { + this.util.startMiniCluster(3); + } + + @After + public void tearDown() throws Exception { + MiniHBaseCluster cluster = this.util.getHBaseCluster(); + HMaster master = cluster == null? null: cluster.getMaster(); + if (master != null && master.getMasterProcedureExecutor() != null) { + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( + master.getMasterProcedureExecutor(), false); + } + this.util.shutdownMiniCluster(); + } + + + @Test + public void testCrashTargetRs() throws Exception { + testRecoveryAndDoubleExecution(false, false); + } + + @Test + public void testRecoveryAndDoubleExecutionOnRsWithMeta() throws Exception { + testRecoveryAndDoubleExecution(true, true); + } + + @Test + public void testRecoveryAndDoubleExecutionOnRsWithoutMeta() throws Exception { + testRecoveryAndDoubleExecution(false, true); + } + + private long getSCPProcId(ProcedureExecutor procExec) { + util.waitFor(30000, () -> !procExec.getProcedures().isEmpty()); + return procExec.getActiveProcIds().stream().mapToLong(Long::longValue).min().getAsLong(); + } + + /** + * Run server crash procedure steps twice to test idempotency and that we are persisting all + * needed state. + */ + private void testRecoveryAndDoubleExecution(boolean carryingMeta, boolean doubleExecution) + throws Exception { + final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution-carryingMeta-" + + carryingMeta + "-doubleExecution-" + doubleExecution); + try (Table t = createTable(tableName)) { + // Load the table with a bit of data so some logs to split and some edits in each region. + this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]); + final int count = util.countRows(t); + assertTrue("expected some rows", count > 0); + final String checksum = util.checksumRows(t); + // Run the procedure executor outside the master so we can mess with it. Need to disable + // Master's running of the server crash processing. + final HMaster master = this.util.getHBaseCluster().getMaster(); + final ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + // find the first server that match the request and executes the test + ServerName rsToKill = null; + for (RegionInfo hri : util.getAdmin().getRegions(tableName)) { + final ServerName serverName = AssignmentTestingUtil.getServerHoldingRegion(util, hri); + if (AssignmentTestingUtil.isServerHoldingMeta(util, serverName) == carryingMeta) { + rsToKill = serverName; + break; + } + } + // Enable test flags and then queue the crash procedure. + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + if (doubleExecution) { + // For SCP, if you enable this then we will enter an infinite loop, as we will crash between + // queue and open for TRSP, and then going back to queue, as we will use the crash rs as the + // target server since it is recored in hbase:meta. + ProcedureTestingUtility.setKillIfHasParent(procExec, false); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + // kill the RS + AssignmentTestingUtil.killRs(util, rsToKill); + long procId = getSCPProcId(procExec); + // Now run through the procedure twice crashing the executor on each step... + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); + } else { + // kill the RS + AssignmentTestingUtil.killRs(util, rsToKill); + long procId = getSCPProcId(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } + assertReplicaDistributed(t); + assertEquals(count, util.countRows(t)); + assertEquals(checksum, util.checksumRows(t)); + } + } + + @Test + public void testConcurrentSCPForSameServer() throws Exception { + final TableName tableName = + TableName.valueOf("testConcurrentSCPForSameServer"); + try (Table t = createTable(tableName)) { + // Load the table with a bit of data so some logs to split and some edits in each region. + this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]); + final int count = util.countRows(t); + assertTrue("expected some rows", count > 0); + // find the first server that match the request and executes the test + ServerName rsToKill = null; + for (RegionInfo hri : util.getAdmin().getRegions(tableName)) { + final ServerName serverName = AssignmentTestingUtil.getServerHoldingRegion(util, hri); + if (AssignmentTestingUtil.isServerHoldingMeta(util, serverName) == true) { + rsToKill = serverName; + break; + } + } + HMaster master = util.getHBaseCluster().getMaster(); + final ProcedureExecutor pExecutor = master.getMasterProcedureExecutor(); + ServerCrashProcedure procB = + new ServerCrashProcedure(pExecutor.getEnvironment(), rsToKill, false, false); + AssignmentTestingUtil.killRs(util, rsToKill); + long procId = getSCPProcId(pExecutor); + Procedure procA = pExecutor.getProcedure(procId); + LOG.info("submit SCP procedureA"); + util.waitFor(5000, () -> procA.hasLock()); + LOG.info("procedureA acquired the lock"); + assertEquals(Procedure.LockState.LOCK_EVENT_WAIT, + procB.acquireLock(pExecutor.getEnvironment())); + LOG.info("procedureB should not be able to get the lock"); + util.waitFor(60000, + () -> procB.acquireLock(pExecutor.getEnvironment()) == Procedure.LockState.LOCK_ACQUIRED); + LOG.info("when procedure B get the lock, procedure A should be finished"); + assertTrue(procA.isFinished()); + } + } + + protected void assertReplicaDistributed(final Table t) { + return; + } + + protected Table createTable(final TableName tableName) throws IOException { + final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, + HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE); + return t; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicas.java new file mode 100644 index 0000000..522e820 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicas.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable + * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" + * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License + * for the specific language governing permissions and limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ MasterTests.class, LargeTests.class }) +public class TestSCPWithReplicas extends TestSCP { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSCPWithReplicas.class); + private static final Logger LOG = LoggerFactory.getLogger(TestSCPWithReplicas.class); + + @Override + protected void startMiniCluster() throws Exception { + // Start a cluster with 4 nodes because we have 3 replicas. + // So on a crash of a server still we can ensure that the + // replicas are distributed. + this.util.startMiniCluster(4); + } + + @Override + protected Table createTable(final TableName tableName) throws IOException { + final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, + HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE, 3); + return t; + } + + @Override + protected void assertReplicaDistributed(final Table t) { + // Assert all data came back. + List regionInfos = new ArrayList<>(); + for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) { + regionInfos.clear(); + for (Region r : rs.getRegionServer().getRegions(t.getName())) { + LOG.info("The region is " + r.getRegionInfo() + " the location is " + + rs.getRegionServer().getServerName()); + if (contains(regionInfos, r.getRegionInfo())) { + LOG.error("Am exiting"); + fail("Crashed replica regions should not be assigned to same region server"); + } else { + regionInfos.add(r.getRegionInfo()); + } + } + } + } + + private boolean contains(List regionInfos, RegionInfo regionInfo) { + for (RegionInfo info : regionInfos) { + if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) { + return true; + } + } + return false; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicasWithoutZKCoordinated.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicasWithoutZKCoordinated.java new file mode 100644 index 0000000..ead6572 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithReplicasWithoutZKCoordinated.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, LargeTests.class }) +public class TestSCPWithReplicasWithoutZKCoordinated extends TestSCPWithReplicas { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSCPWithReplicasWithoutZKCoordinated.class); + + @Override + protected void setupConf(Configuration conf) { + super.setupConf(conf); + conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithoutZKCoordinated.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithoutZKCoordinated.java new file mode 100644 index 0000000..059ece2 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPWithoutZKCoordinated.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; + +@Category({MasterTests.class, LargeTests.class}) +public class TestSCPWithoutZKCoordinated extends TestSCP { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSCPWithoutZKCoordinated.class); + + @Override + protected void setupConf(Configuration conf) { + super.setupConf(conf); + conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, false); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java deleted file mode 100644 index 6751eaf..0000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java +++ /dev/null @@ -1,244 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.procedure; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; -import org.apache.hadoop.hbase.procedure2.Procedure; -import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; -import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; -import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; -import org.apache.hadoop.hbase.testclassification.LargeTests; -import org.apache.hadoop.hbase.testclassification.MasterTests; -import org.junit.After; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@RunWith(Parameterized.class) -@Category({MasterTests.class, LargeTests.class}) -public class TestServerCrashProcedure { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestServerCrashProcedure.class); - - private static final Logger LOG = LoggerFactory.getLogger(TestServerCrashProcedure.class); - - protected HBaseTestingUtility util; - - @Parameter - public boolean splitWALCoordinatedByZK; - - private ProcedureMetrics serverCrashProcMetrics; - private long serverCrashSubmittedCount = 0; - private long serverCrashFailedCount = 0; - - private void setupConf(Configuration conf) { - conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); - conf.set("hbase.balancer.tablesOnMaster", "none"); - conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); - conf.setInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER, 3); - conf.setBoolean("hbase.split.writer.creation.bounded", true); - conf.setInt("hbase.regionserver.hlog.splitlog.writer.threads", 8); - LOG.info("WAL splitting coordinated by zk? {}", splitWALCoordinatedByZK); - conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, splitWALCoordinatedByZK); - } - - @Before - public void setup() throws Exception { - this.util = new HBaseTestingUtility(); - setupConf(this.util.getConfiguration()); - startMiniCluster(); - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( - this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); - serverCrashProcMetrics = this.util.getHBaseCluster().getMaster().getMasterMetrics() - .getServerCrashProcMetrics(); - } - - protected void startMiniCluster() throws Exception { - this.util.startMiniCluster(3); - } - - @After - public void tearDown() throws Exception { - MiniHBaseCluster cluster = this.util.getHBaseCluster(); - HMaster master = cluster == null? null: cluster.getMaster(); - if (master != null && master.getMasterProcedureExecutor() != null) { - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( - master.getMasterProcedureExecutor(), false); - } - this.util.shutdownMiniCluster(); - } - - - @Test - public void testCrashTargetRs() throws Exception { - testRecoveryAndDoubleExecution(false, false); - } - - @Test - public void testRecoveryAndDoubleExecutionOnRsWithMeta() throws Exception { - testRecoveryAndDoubleExecution(true, true); - } - - @Test - public void testRecoveryAndDoubleExecutionOnRsWithoutMeta() throws Exception { - testRecoveryAndDoubleExecution(false, true); - } - - private long getSCPProcId(ProcedureExecutor procExec) { - util.waitFor(30000, () -> !procExec.getProcedures().isEmpty()); - return procExec.getActiveProcIds().stream().mapToLong(Long::longValue).min().getAsLong(); - } - - /** - * Run server crash procedure steps twice to test idempotency and that we are persisting all - * needed state. - */ - private void testRecoveryAndDoubleExecution(boolean carryingMeta, boolean doubleExecution) - throws Exception { - final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution-carryingMeta-" - + carryingMeta + "-doubleExecution-" + doubleExecution); - try (Table t = createTable(tableName)) { - // Load the table with a bit of data so some logs to split and some edits in each region. - this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]); - final int count = util.countRows(t); - assertTrue("expected some rows", count > 0); - final String checksum = util.checksumRows(t); - // Run the procedure executor outside the master so we can mess with it. Need to disable - // Master's running of the server crash processing. - final HMaster master = this.util.getHBaseCluster().getMaster(); - final ProcedureExecutor procExec = master.getMasterProcedureExecutor(); - // find the first server that match the request and executes the test - ServerName rsToKill = null; - for (RegionInfo hri : util.getAdmin().getRegions(tableName)) { - final ServerName serverName = AssignmentTestingUtil.getServerHoldingRegion(util, hri); - if (AssignmentTestingUtil.isServerHoldingMeta(util, serverName) == carryingMeta) { - rsToKill = serverName; - break; - } - } - // Enable test flags and then queue the crash procedure. - ProcedureTestingUtility.waitNoProcedureRunning(procExec); - if (doubleExecution) { - // For SCP, if you enable this then we will enter an infinite loop, as we will crash between - // queue and open for TRSP, and then going back to queue, as we will use the crash rs as the - // target server since it is recored in hbase:meta. - ProcedureTestingUtility.setKillIfHasParent(procExec, false); - ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); - // kill the RS - AssignmentTestingUtil.killRs(util, rsToKill); - long procId = getSCPProcId(procExec); - // Now run through the procedure twice crashing the executor on each step... - MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); - } else { - // kill the RS - AssignmentTestingUtil.killRs(util, rsToKill); - long procId = getSCPProcId(procExec); - ProcedureTestingUtility.waitProcedure(procExec, procId); - } - assertReplicaDistributed(t); - assertEquals(count, util.countRows(t)); - assertEquals(checksum, util.checksumRows(t)); - } catch (Throwable throwable) { - LOG.error("Test failed!", throwable); - throw throwable; - } - } - - @Test - public void testConcurrentSCPForSameServer() throws Exception { - final TableName tableName = - TableName.valueOf("testConcurrentSCPForSameServer-" + splitWALCoordinatedByZK); - try (Table t = createTable(tableName)) { - // Load the table with a bit of data so some logs to split and some edits in each region. - this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]); - final int count = util.countRows(t); - assertTrue("expected some rows", count > 0); - // find the first server that match the request and executes the test - ServerName rsToKill = null; - for (RegionInfo hri : util.getAdmin().getRegions(tableName)) { - final ServerName serverName = AssignmentTestingUtil.getServerHoldingRegion(util, hri); - if (AssignmentTestingUtil.isServerHoldingMeta(util, serverName) == true) { - rsToKill = serverName; - break; - } - } - HMaster master = util.getHBaseCluster().getMaster(); - final ProcedureExecutor pExecutor = master.getMasterProcedureExecutor(); - ServerCrashProcedure procB = - new ServerCrashProcedure(pExecutor.getEnvironment(), rsToKill, false, false); - AssignmentTestingUtil.killRs(util, rsToKill); - long procId = getSCPProcId(pExecutor); - Procedure procA = pExecutor.getProcedure(procId); - LOG.info("submit SCP procedureA"); - util.waitFor(5000, () -> procA.hasLock()); - LOG.info("procedureA acquired the lock"); - assertEquals(Procedure.LockState.LOCK_EVENT_WAIT, - procB.acquireLock(pExecutor.getEnvironment())); - LOG.info("procedureB should not be able to get the lock"); - util.waitFor(60000, - () -> procB.acquireLock(pExecutor.getEnvironment()) == Procedure.LockState.LOCK_ACQUIRED); - LOG.info("when procedure B get the lock, procedure A should be finished"); - assertTrue(procA.isFinished()); - } - } - - protected void assertReplicaDistributed(final Table t) { - return; - } - - protected Table createTable(final TableName tableName) throws IOException { - final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, - HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE); - return t; - } - - private void collectMasterMetrics() { - serverCrashSubmittedCount = serverCrashProcMetrics.getSubmittedCounter().getCount(); - serverCrashFailedCount = serverCrashProcMetrics.getFailedCounter().getCount(); - } - - @Parameterized.Parameters - public static Collection coordinatedByZK() { - return Arrays.asList(false, true); - } -} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedureWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedureWithReplicas.java deleted file mode 100644 index 08446b4..0000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedureWithReplicas.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more contributor license - * agreements. See the NOTICE file distributed with this work for additional information regarding - * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. You may obtain a - * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable - * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" - * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under the License. - */ -package org.apache.hadoop.hbase.master.procedure; - -import static org.junit.Assert.fail; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; -import org.apache.hadoop.hbase.client.Table; -import org.apache.hadoop.hbase.regionserver.Region; -import org.apache.hadoop.hbase.testclassification.LargeTests; -import org.apache.hadoop.hbase.testclassification.MasterTests; -import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; -import org.junit.ClassRule; -import org.junit.experimental.categories.Category; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Category({ MasterTests.class, LargeTests.class }) -public class TestServerCrashProcedureWithReplicas extends TestServerCrashProcedure { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestServerCrashProcedureWithReplicas.class); - private static final Logger LOG = - LoggerFactory.getLogger(TestServerCrashProcedureWithReplicas.class); - - @Override - protected void startMiniCluster() throws Exception { - // Start a cluster with 4 nodes because we have 3 replicas. - // So on a crash of a server still we can ensure that the - // replicas are distributed. - this.util.startMiniCluster(4); - } - - @Override - protected Table createTable(final TableName tableName) throws IOException { - final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, - HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE, 3); - return t; - } - - protected void assertReplicaDistributed(final Table t) { - // Assert all data came back. - List regionInfos = new ArrayList<>(); - for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) { - regionInfos.clear(); - for (Region r : rs.getRegionServer().getRegions(t.getName())) { - LOG.info("The region is " + r.getRegionInfo() + " the location is " - + rs.getRegionServer().getServerName()); - if (contains(regionInfos, r.getRegionInfo())) { - LOG.error("Am exiting"); - fail("Crashed replica regions should not be assigned to same region server"); - } else { - regionInfos.add(r.getRegionInfo()); - } - } - } - } - - private boolean contains(List regionInfos, RegionInfo regionInfo) { - for (RegionInfo info : regionInfos) { - if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) { - return true; - } - } - return false; - } -} -- 2.7.4