From 7c21d5d01d33ddda25c4186370fd625ec29344cd Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Tue, 13 Aug 2013 13:37:35 -0700 Subject: [PATCH] starting to make Chaos monkey more easily configurable. --- .../hadoop/hbase/DistributedHBaseCluster.java | 3 +- .../hadoop/hbase/IngestIntegrationTestBase.java | 151 --- .../apache/hadoop/hbase/IntegrationTestBase.java | 103 ++ ...IntegrationTestDataIngestSlowDeterministic.java | 118 -- .../IntegrationTestDataIngestWithChaosMonkey.java | 109 -- .../apache/hadoop/hbase/IntegrationTestIngest.java | 179 +++ .../hadoop/hbase/IntegrationTestManyRegions.java | 6 +- ...grationTestRebalanceAndKillServersTargeted.java | 122 -- .../hadoop/hbase/IntegrationTestsDriver.java | 5 +- .../apache/hadoop/hbase/chaos/actions/Action.java | 150 +++ .../hbase/chaos/actions/AddColumnAction.java | 62 + .../hbase/chaos/actions/BatchRestartRsAction.java | 69 ++ .../hbase/chaos/actions/ChangeEncodingAction.java | 68 ++ .../hbase/chaos/actions/ChangeVersionsAction.java | 68 ++ .../actions/CompactRandomRegionOfTableAction.java | 75 ++ .../hbase/chaos/actions/CompactTableAction.java | 63 + .../actions/FlushRandomRegionOfTableAction.java | 62 + .../hbase/chaos/actions/FlushTableAction.java | 54 + .../hbase/chaos/actions/ForceBalancerAction.java | 30 + .../MergeRandomAdjacentRegionsOfTableAction.java | 68 ++ .../actions/MoveRandomRegionOfTableAction.java | 62 + .../chaos/actions/MoveRegionsOfTableAction.java | 71 ++ .../hbase/chaos/actions/RemoveColumnAction.java | 69 ++ .../chaos/actions/RestartActionBaseAction.java | 54 + .../chaos/actions/RestartActiveMasterAction.java | 37 + .../hbase/chaos/actions/RestartRandomRsAction.java | 39 + .../chaos/actions/RestartRsHoldingMetaAction.java | 40 + .../chaos/actions/RestartRsHoldingTableAction.java | 57 + .../chaos/actions/RollingBatchRestartRsAction.java | 70 ++ .../hbase/chaos/actions/SnapshotTableAction.java | 52 + .../actions/SplitRandomRegionOfTableAction.java | 62 + .../actions/UnbalanceKillAndRebalanceAction.java | 49 + .../chaos/actions/UnbalanceRegionsAction.java | 60 + .../hbase/chaos/factories/CalmMonkeyFactory.java | 33 + .../hbase/chaos/factories/MonkeyFactory.java | 72 ++ .../factories/SlowDeterministicMonkeyFactory.java | 91 ++ .../chaos/factories/UnbalanceMonkeyFactory.java | 41 + .../hbase/chaos/monkies/CalmChaosMonkey.java | 27 + .../hadoop/hbase/chaos/monkies/ChaosMonkey.java | 15 + .../chaos/monkies/PolicyBasedChaosMonkey.java | 162 +++ .../chaos/policies/CompositeSequentialPolicy.java | 53 + .../hbase/chaos/policies/DoActionsOncePolicy.java | 64 ++ .../hbase/chaos/policies/PeriodicPolicy.java | 59 + .../chaos/policies/PeriodicRandomActionPolicy.java | 73 ++ .../apache/hadoop/hbase/chaos/policies/Policy.java | 48 + .../hbase/mapreduce/IntegrationTestBulkLoad.java | 82 +- .../hbase/mapreduce/IntegrationTestImportTsv.java | 10 +- .../hadoop/hbase/mttr/IntegrationTestMTTR.java | 46 +- .../hbase/test/IntegrationTestBigLinkedList.java | 84 +- ...ntegrationTestBigLinkedListWithChaosMonkey.java | 143 --- .../hbase/test/IntegrationTestLoadAndVerify.java | 76 +- .../org/apache/hadoop/hbase/util/ChaosMonkey.java | 1213 -------------------- .../org/apache/hadoop/hbase/util/LoadTestTool.java | 2 +- 53 files changed, 2677 insertions(+), 2004 deletions(-) delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IngestIntegrationTestBase.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/CompositeSequentialPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java index 9f8789e..c61f590 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase; import java.io.IOException; import java.util.HashMap; +import com.google.common.collect.Sets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterManager.ServiceType; @@ -35,8 +36,6 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; -import com.google.common.collect.Sets; - /** * Manages the interactions with an already deployed distributed cluster (as opposed to * a pseudo-distributed, or mini/local cluster). This is used by integration and system tests. diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IngestIntegrationTestBase.java hbase-it/src/test/java/org/apache/hadoop/hbase/IngestIntegrationTestBase.java deleted file mode 100644 index cee90e7..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IngestIntegrationTestBase.java +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.LoadTestTool; -import org.junit.Assert; - -/** - * A base class for tests that do something with the cluster while running - * {@link LoadTestTool} to write and verify some data. - */ -public abstract class IngestIntegrationTestBase { - protected static String tableName = null; - - /** A soft limit on how long we should run */ - private static final String RUN_TIME_KEY = "hbase.%s.runtime"; - - protected static final Log LOG = LogFactory.getLog(IngestIntegrationTestBase.class); - protected IntegrationTestingUtility util; - protected HBaseCluster cluster; - private LoadTestTool loadTool; - - protected void setUp(int numSlavesBase, Configuration conf) throws Exception { - tableName = this.getClass().getSimpleName(); - util = getTestingUtil(conf); - LOG.debug("Initializing/checking cluster has " + numSlavesBase + " servers"); - util.initializeCluster(numSlavesBase); - LOG.debug("Done initializing/checking cluster"); - cluster = util.getHBaseClusterInterface(); - deleteTableIfNecessary(); - loadTool = new LoadTestTool(); - loadTool.setConf(util.getConfiguration()); - // Initialize load test tool before we start breaking things; - // LoadTestTool init, even when it is a no-op, is very fragile. - int ret = loadTool.run(new String[] { "-tn", tableName, "-init_only" }); - Assert.assertEquals("Failed to initialize LoadTestTool", 0, ret); - } - - protected IntegrationTestingUtility getTestingUtil(Configuration conf) { - if (this.util == null) { - if (conf == null) { - this.util = new IntegrationTestingUtility(); - } else { - this.util = new IntegrationTestingUtility(conf); - } - } - return util; - } - - protected void setUp(int numSlavesBase) throws Exception { - setUp(numSlavesBase, null); - } - - protected void tearDown() throws Exception { - LOG.debug("Restoring the cluster"); - util.restoreCluster(); - LOG.debug("Done restoring the cluster"); - } - - private void deleteTableIfNecessary() throws IOException { - if (util.getHBaseAdmin().tableExists(tableName)) { - util.deleteTable(Bytes.toBytes(tableName)); - } - } - - protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, - int colsPerKey, int recordSize, int writeThreads) throws Exception { - LOG.info("Running ingest"); - LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize()); - - long start = System.currentTimeMillis(); - String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName()); - long runtime = util.getConfiguration().getLong(runtimeKey, defaultRunTime); - long startKey = 0; - - long numKeys = getNumKeys(keysPerServerPerIter); - while (System.currentTimeMillis() - start < 0.9 * runtime) { - LOG.info("Intended run time: " + (runtime/60000) + " min, left:" + - ((runtime - (System.currentTimeMillis() - start))/60000) + " min"); - - int ret = loadTool.run(new String[] { - "-tn", tableName, - "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); - if (0 != ret) { - String errorMsg = "Load failed with error code " + ret; - LOG.error(errorMsg); - Assert.fail(errorMsg); - } - - ret = loadTool.run(new String[] { - "-tn", tableName, - "-update", String.format("60:%d", writeThreads), - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); - if (0 != ret) { - String errorMsg = "Update failed with error code " + ret; - LOG.error(errorMsg); - Assert.fail(errorMsg); - } - - ret = loadTool.run(new String[] { - "-tn", tableName, - "-read", "100:20", - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); - if (0 != ret) { - String errorMsg = "Verification failed with error code " + ret; - LOG.error(errorMsg); - Assert.fail(errorMsg); - } - startKey += numKeys; - } - } - - /** Estimates a data size based on the cluster size */ - private long getNumKeys(int keysPerServer) - throws IOException { - int numRegionServers = cluster.getClusterStatus().getServersSize(); - return keysPerServer * numRegionServers; - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java new file mode 100644 index 0000000..ebb87e7 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestBase.java @@ -0,0 +1,103 @@ +package org.apache.hadoop.hbase; + +import java.util.Set; + +import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.chaos.factories.MonkeyFactory; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.util.AbstractHBaseTool; +import org.junit.After; +import org.junit.Before; + +/** + * Base class for HBase integration tests that want to use the Chaos Monkey. + */ +public abstract class IntegrationTestBase extends AbstractHBaseTool { + public static final String LONG_OPT = "monkey"; + + protected IntegrationTestingUtility util; + protected ChaosMonkey monkey; + protected String monkeyToUse; + + public IntegrationTestBase() { + this(MonkeyFactory.CALM); + } + + public IntegrationTestBase(String monkeyToUse) { + this.monkeyToUse = monkeyToUse; + } + + @Override + protected void addOptions() { + addOptWithArg("m", LONG_OPT, "Which chaos monkey to run"); + } + + @Override + protected void processOptions(CommandLine cmd) { + if (cmd.hasOption(LONG_OPT)) { + monkeyToUse = cmd.getOptionValue(LONG_OPT); + } + } + + @Override + public Configuration getConf() { + Configuration c = super.getConf(); + if (c == null && util != null) { + conf = util.getConfiguration(); + c = conf; + } + return c; + } + + @Override + protected int doWork() throws Exception { + setUpMonkey(); + setUp(); + int result = -1; + try { + runTestFromCommandLine(); + } finally { + cleanUpMonkey(); + cleanUp(); + } + + return result; + } + + @Before + public void setUpMonkey() throws Exception { + util = getTestingUtil(getConf()); + MonkeyFactory fact = MonkeyFactory.getFactory(monkeyToUse); + monkey = fact.setUtil(util) + .setTableName(getTablename()) + .setColumnFamilies(getColumnFamilies()).build(); + } + + @After + public void cleanUpMonkey() throws Exception { + monkey.stop("Ending test"); + monkey.waitForStop(); + } + + protected IntegrationTestingUtility getTestingUtil(Configuration conf) { + if (this.util == null) { + if (conf == null) { + this.util = new IntegrationTestingUtility(); + } else { + this.util = new IntegrationTestingUtility(conf); + } + } + return util; + } + + public abstract void setUp() throws Exception; + + public abstract void cleanUp() throws Exception; + + public abstract int runTestFromCommandLine() throws Exception; + + public abstract String getTablename(); + + protected abstract Set getColumnFamilies(); +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java deleted file mode 100644 index a3ab48c..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.Action; -import org.apache.hadoop.hbase.util.ChaosMonkey.BatchRestartRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompactRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompactTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompositeSequentialPolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.DoActionsOncePolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.FlushRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.FlushTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MergeRandomAdjacentRegionsOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MoveRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MoveRegionsOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.PeriodicRandomActionPolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartActiveMaster; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartRandomRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartRsHoldingMeta; -import org.apache.hadoop.hbase.util.ChaosMonkey.RollingBatchRestartRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.SnapshotTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.SplitRandomRegionOfTable; -import org.apache.hadoop.hbase.util.LoadTestTool; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -/** - * A system test which does large data ingestion and verify using {@link LoadTestTool}. - * It performs a set of actions deterministically using ChaosMonkey, then starts killing - * things randomly. You can configure how long should the load test run by using - * "hbase.IntegrationTestDataIngestSlowDeterministic.runtime" configuration parameter. - */ -@Category(IntegrationTests.class) -public class IntegrationTestDataIngestSlowDeterministic extends IngestIntegrationTestBase { - private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster - private static final long DEFAULT_RUN_TIME = 20 * 60 * 1000; - - private ChaosMonkey monkey; - - @Before - public void setUp() throws Exception { - super.setUp(SERVER_COUNT); - - // Actions such as compact/flush a table/region, - // move one region around. They are not so destructive, - // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) - }; - - // Actions such as split/merge/snapshot. - // They should not cause data loss, or unreliability - // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new ChaosMonkey.AddColumnAction(tableName), - new ChaosMonkey.RemoveColumnAction(tableName), - new ChaosMonkey.ChangeEncodingAction(tableName), - new ChaosMonkey.ChangeVersionsAction(tableName) - }; - - // Destructive actions to mess things around. - Action[] actions3 = new Action[] { - new MoveRegionsOfTable(tableName), - new RestartRandomRs(60000), - new BatchRestartRs(5000, 0.5f), - new RestartActiveMaster(5000), - new RollingBatchRestartRs(5000, 1.0f), - new RestartRsHoldingMeta(35000) - }; - - monkey = new ChaosMonkey(util, - new PeriodicRandomActionPolicy(60 * 1000, actions1), - new PeriodicRandomActionPolicy(90 * 1000, actions2), - new CompositeSequentialPolicy( - new DoActionsOncePolicy(150 * 1000, actions3), - new PeriodicRandomActionPolicy(150 * 1000, actions3))); - monkey.start(); - } - - @After - public void tearDown() throws Exception { - if (monkey != null) { - monkey.stop("tearDown"); - monkey.waitForStop(); - } - super.tearDown(); - } - - @Test - public void testDataIngest() throws Exception { - runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10); - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java deleted file mode 100644 index 9bc0e56..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.*; -import org.apache.hadoop.hbase.util.LoadTestTool; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -/** - * A system test which does large data ingestion and verify using {@link LoadTestTool}, - * while killing the region servers and the master(s) randomly. You can configure how long - * should the load test run by using "hbase.IntegrationTestDataIngestWithChaosMonkey.runtime" - * configuration parameter. - */ -@Category(IntegrationTests.class) -public class IntegrationTestDataIngestWithChaosMonkey extends IngestIntegrationTestBase { - - private static int NUM_SLAVES_BASE = 4; //number of slaves for the smallest cluster - - // run for 10 min by default - private static final long DEFAULT_RUN_TIME = 10 * 60 * 1000; - - private ChaosMonkey monkey; - - @Before - public void setUp() throws Exception { - util= getTestingUtil(null); - Configuration conf = util.getConfiguration(); - if (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, - HConstants.DEFAULT_DISTRIBUTED_LOG_REPLAY_CONFIG)) { - // when distributedLogReplay is enabled, we need to make sure rpc timeout & retires are - // smaller enough in order for the replay can complete before ChaosMonkey kills another region - // server - conf.setInt("hbase.log.replay.retries.number", 2); - conf.setInt("hbase.log.replay.rpc.timeout", 2000); - conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true); - } - if(!util.isDistributedCluster()) { - NUM_SLAVES_BASE = 5; - } - super.setUp(NUM_SLAVES_BASE); - - // Actions such as compact/flush a table/region, - // move one region around. They are not so destructive, - // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) - }; - - // Actions such as split/merge/snapshot. - // They should not cause data loss, or unreliability - // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new MoveRegionsOfTable(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName), - new ChangeEncodingAction(tableName), - new ChangeVersionsAction(tableName) - }; - - monkey = new ChaosMonkey(util, - new PeriodicRandomActionPolicy(30 * 1000, actions1), - new PeriodicRandomActionPolicy(60 * 1000, actions2), - ChaosMonkey.getPolicyByName(ChaosMonkey.EVERY_MINUTE_RANDOM_ACTION_POLICY)); - monkey.start(); - } - - @After - public void tearDown() throws Exception { - if (monkey != null) { - monkey.stop("test has finished, that's why"); - monkey.waitForStop(); - } - super.tearDown(); - } - - @Test - public void testDataIngest() throws Exception { - runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 100, 20); - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java new file mode 100644 index 0000000..be72544 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.util.Set; + +import com.google.common.collect.Sets; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.LoadTestTool; +import org.apache.hadoop.util.ToolRunner; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * A base class for tests that do something with the cluster while running + * {@link LoadTestTool} to write and verify some data. + */ +@Category(IntegrationTests.class) +public class IntegrationTestIngest extends IntegrationTestBase { + private static final int SERVER_COUNT = 4; // number of slaves for the smallest cluster + private static final long DEFAULT_RUN_TIME = 20 * 60 * 1000; + + protected static String tableName = null; + + /** A soft limit on how long we should run */ + private static final String RUN_TIME_KEY = "hbase.%s.runtime"; + + protected static final Log LOG = LogFactory.getLog(IntegrationTestIngest.class); + protected IntegrationTestingUtility util; + protected HBaseCluster cluster; + private LoadTestTool loadTool; + + protected void setUp(int numSlavesBase) throws Exception { + tableName = this.getClass().getSimpleName(); + util = getTestingUtil(null); + LOG.debug("Initializing/checking cluster has " + numSlavesBase + " servers"); + util.initializeCluster(numSlavesBase); + LOG.debug("Done initializing/checking cluster"); + cluster = util.getHBaseClusterInterface(); + deleteTableIfNecessary(); + loadTool = new LoadTestTool(); + loadTool.setConf(util.getConfiguration()); + // Initialize load test tool before we start breaking things; + // LoadTestTool init, even when it is a no-op, is very fragile. + int ret = loadTool.run(new String[] { "-tn", tableName, "-init_only" }); + Assert.assertEquals("Failed to initialize LoadTestTool", 0, ret); + } + + @Override + public void setUp() throws Exception { + setUp(SERVER_COUNT); + } + + @Override + public void cleanUp() throws Exception { + LOG.debug("Restoring the cluster"); + util.restoreCluster(); + LOG.debug("Done restoring the cluster"); + } + + @Override + public int runTestFromCommandLine() throws Exception { + internalRunIngestTest(); + return 0; + } + + @Test + public void internalRunIngestTest() throws Exception { + runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10); + } + + @Override + public String getTablename() { + return tableName; + } + + @Override + protected Set getColumnFamilies() { + return Sets.newHashSet(Bytes.toString(LoadTestTool.COLUMN_FAMILY)); + } + + private void deleteTableIfNecessary() throws IOException { + if (util.getHBaseAdmin().tableExists(tableName)) { + util.deleteTable(Bytes.toBytes(tableName)); + } + } + + protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, + int colsPerKey, int recordSize, int writeThreads) throws Exception { + LOG.info("Running ingest"); + LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize()); + + long start = System.currentTimeMillis(); + String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName()); + long runtime = util.getConfiguration().getLong(runtimeKey, defaultRunTime); + long startKey = 0; + + long numKeys = getNumKeys(keysPerServerPerIter); + while (System.currentTimeMillis() - start < 0.9 * runtime) { + LOG.info("Intended run time: " + (runtime/60000) + " min, left:" + + ((runtime - (System.currentTimeMillis() - start))/60000) + " min"); + + int ret = loadTool.run(new String[] { + "-tn", tableName, + "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), + "-start_key", String.valueOf(startKey), + "-num_keys", String.valueOf(numKeys), + "-skip_init" + }); + if (0 != ret) { + String errorMsg = "Load failed with error code " + ret; + LOG.error(errorMsg); + Assert.fail(errorMsg); + } + + ret = loadTool.run(new String[] { + "-tn", tableName, + "-update", String.format("60:%d", writeThreads), + "-start_key", String.valueOf(startKey), + "-num_keys", String.valueOf(numKeys), + "-skip_init" + }); + if (0 != ret) { + String errorMsg = "Update failed with error code " + ret; + LOG.error(errorMsg); + Assert.fail(errorMsg); + } + + ret = loadTool.run(new String[] { + "-tn", tableName, + "-read", "100:20", + "-start_key", String.valueOf(startKey), + "-num_keys", String.valueOf(numKeys), + "-skip_init" + }); + if (0 != ret) { + String errorMsg = "Verification failed with error code " + ret; + LOG.error(errorMsg); + Assert.fail(errorMsg); + } + startKey += numKeys; + } + } + + /** Estimates a data size based on the cluster size */ + private long getNumKeys(int keysPerServer) + throws IOException { + int numRegionServers = cluster.getClusterStatus().getServersSize(); + return keysPerServer * numRegionServers; + } + + public static void main(String[] args) throws Exception { + Configuration conf = HBaseConfiguration.create(); + IntegrationTestingUtility.setUseDistributedCluster(conf); + int ret = ToolRunner.run(conf, new IntegrationTestIngest(), args); + System.exit(ret); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestManyRegions.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestManyRegions.java index beded5e..d1587fa 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestManyRegions.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestManyRegions.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hbase; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -35,6 +32,9 @@ import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + /** * An integration test to detect regressions in HBASE-7220. Create * a table with many regions and verify it completes within a diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java deleted file mode 100644 index d302ebe..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -import org.apache.commons.lang.math.RandomUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.HConnectionManager; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.LoadTestTool; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -/** - * A system test which does large data ingestion and verify using {@link LoadTestTool}, - * while introducing chaos by hoarding many regions into few servers (unbalancing), then - * killing some of these servers, and triggering balancer. - * It's configured using a set of constants on top, which cover this scenario and are - * reasonable for minicluster. See constants if you want to tweak the test. - * You can configure how long the test should run by using - * "hbase.IntegrationTestRebalanceAndKillServersTargeted.runtime" configuration parameter, - * which is probably most useful on cluster. - */ -@Category(IntegrationTests.class) -public class IntegrationTestRebalanceAndKillServersTargeted extends IngestIntegrationTestBase { - private static final int NUM_SLAVES_BASE = 4; // number of slaves for the smallest cluster - private static final long DEFAULT_RUN_TIME = 5 * 60 * 1000; // run for 5 min by default - - /** How often to introduce the chaos. If too frequent, sequence of kills on minicluster - * can cause test to fail when Put runs out of retries. */ - private static final long CHAOS_EVERY_MS = 65 * 1000; - - private ChaosMonkey monkey; - - /** This action is too specific to put in ChaosMonkey; put it here */ - static class UnbalanceKillAndRebalanceAction extends ChaosMonkey.Action { - /** Fractions of servers to get regions and live and die respectively; from all other - * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ - private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; - private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; - private static final double HOARD_FRC_OF_REGIONS = 0.8; - /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance - * and restarting the servers; to make sure these events have time to impact the cluster. */ - private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; - private static final long WAIT_FOR_KILLS_MS = 2 * 1000; - private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; - - @Override - public void perform() throws Exception { - ClusterStatus status = this.cluster.getClusterStatus(); - List victimServers = new LinkedList(status.getServers()); - int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); - int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); - Assert.assertTrue((liveCount + deadCount) < victimServers.size()); - List targetServers = new ArrayList(liveCount); - for (int i = 0; i < liveCount + deadCount; ++i) { - int victimIx = RandomUtils.nextInt(victimServers.size()); - targetServers.add(victimServers.remove(victimIx)); - } - unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); - Thread.sleep(WAIT_FOR_UNBALANCE_MS); - for (int i = 0; i < liveCount; ++i) { - killRs(targetServers.get(i)); - } - Thread.sleep(WAIT_FOR_KILLS_MS); - forceBalancer(); - Thread.sleep(WAIT_AFTER_BALANCE_MS); - for (int i = 0; i < liveCount; ++i) { - startRs(targetServers.get(i)); - } - } - } - - @Before - public void setUp() throws Exception { - Configuration conf = HBaseConfiguration.create(); - conf.set(HConnectionManager.RETRIES_BY_SERVER_KEY, "true"); - super.setUp(NUM_SLAVES_BASE, conf); - - ChaosMonkey.Policy chaosPolicy = new ChaosMonkey.PeriodicRandomActionPolicy( - CHAOS_EVERY_MS, new UnbalanceKillAndRebalanceAction()); - monkey = new ChaosMonkey(util, chaosPolicy); - monkey.start(); - } - - @After - public void tearDown() throws Exception { - if (monkey != null) { - monkey.stop("tearDown"); - monkey.waitForStop(); - } - super.tearDown(); - } - - // Disabled until we fix hbase-7520 - @Test - public void testDataIngest() throws Exception { - runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 100, 20); - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestsDriver.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestsDriver.java index c851b20..d3f41ad 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestsDriver.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestsDriver.java @@ -23,15 +23,14 @@ import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.cli.CommandLine; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.util.AbstractHBaseTool; import org.apache.hadoop.util.ToolRunner; import org.junit.internal.TextListener; import org.junit.runner.JUnitCore; import org.junit.runner.Result; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - /** * This class drives the Integration test suite execution. Executes all * tests having @Category(IntegrationTests.class) annotation against an diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java new file mode 100644 index 0000000..f9c3383 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.HBaseCluster; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.ServerLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * A (possibly mischievous) action that the ChaosMonkey can perform. + */ +public class Action { + + protected static Log LOG = LogFactory.getLog(Action.class); + + protected ActionContext context; + protected HBaseCluster cluster; + protected ClusterStatus initialStatus; + protected ServerName[] initialServers; + + public void init(ActionContext context) throws IOException { + this.context = context; + cluster = context.getHBaseCluster(); + initialStatus = cluster.getInitialClusterStatus(); + Collection regionServers = initialStatus.getServers(); + initialServers = regionServers.toArray(new ServerName[regionServers.size()]); + } + + public void perform() throws Exception { }; + + /** Returns current region servers */ + protected ServerName[] getCurrentServers() throws IOException { + Collection regionServers = cluster.getClusterStatus().getServers(); + if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {}; + return regionServers.toArray(new ServerName[regionServers.size()]); + } + + protected void killMaster(ServerName server) throws IOException { + LOG.info("Killing master:" + server); + cluster.killMaster(server); + cluster.waitForMasterToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Killed master server:" + server); + } + + protected void startMaster(ServerName server) throws IOException { + LOG.info("Starting master:" + server.getHostname()); + cluster.startMaster(server.getHostname()); + cluster.waitForActiveAndReadyMaster(PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Started master: " + server); + } + + protected void killRs(ServerName server) throws IOException { + LOG.info("Killing region server:" + server); + cluster.killRegionServer(server); + cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Killed region server:" + server + ". Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } + + protected void startRs(ServerName server) throws IOException { + LOG.info("Starting region server:" + server.getHostname()); + cluster.startRegionServer(server.getHostname()); + cluster.waitForRegionServerToStart(server.getHostname(), PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Started region server:" + server + ". Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } + + protected void unbalanceRegions(ClusterStatus clusterStatus, + List fromServers, List toServers, + double fractionOfRegions) throws Exception { + List victimRegions = new LinkedList(); + for (ServerName server : fromServers) { + ServerLoad serverLoad = clusterStatus.getLoad(server); + // Ugh. + List regions = new LinkedList(serverLoad.getRegionsLoad().keySet()); + int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); + LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); + for (int i = 0; i < victimRegionCount; ++i) { + int victimIx = RandomUtils.nextInt(regions.size()); + String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); + victimRegions.add(Bytes.toBytes(regionId)); + } + } + + LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() + + " servers to " + toServers.size() + " different servers"); + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + for (byte[] victimRegion : victimRegions) { + int targetIx = RandomUtils.nextInt(toServers.size()); + admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); + } + } + + protected void forceBalancer() throws Exception { + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + boolean result = admin.balancer(); + if (!result) { + LOG.error("Balancer didn't succeed"); + } + } + + /** + * Context for Action's + */ + public static class ActionContext { + private IntegrationTestingUtility util; + + public ActionContext(IntegrationTestingUtility util) { + this.util = util; + } + + public IntegrationTestingUtility getHaseIntegrationTestingUtility() { + return util; + } + + public HBaseCluster getHBaseCluster() { + return util.getHBaseClusterInterface(); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java new file mode 100644 index 0000000..82afed9 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Random; + +import org.apache.commons.lang.RandomStringUtils; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Action the adds a column family to a table. + */ +public class AddColumnAction extends Action { + private final Random random; + private final byte[] tableName; + private HBaseAdmin admin; + + public AddColumnAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor columnDescriptor = null; + + while(columnDescriptor == null || + tableDescriptor.getFamily(columnDescriptor.getName()) != null) { + columnDescriptor = new HColumnDescriptor(RandomStringUtils.randomAlphabetic(5)); + } + + tableDescriptor.addFamily(columnDescriptor); + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java new file mode 100644 index 0000000..edfd9c4 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +/** + * Restarts a ratio of the running regionservers at the same time + */ +public class BatchRestartRsAction extends RestartActionBaseAction { + float ratio; //ratio of regionservers to restart + + public BatchRestartRsAction(long sleepTime, float ratio) { + super(sleepTime); + this.ratio = ratio; + } + + @Override + public void perform() throws Exception { + LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", + (int)(ratio * 100))); + List selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), + ratio); + + for (ServerName server : selectedServers) { + LOG.info("Killing region server:" + server); + cluster.killRegionServer(server); + } + + for (ServerName server : selectedServers) { + cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + } + + LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + + sleep(sleepTime); + + for (ServerName server : selectedServers) { + LOG.info("Starting region server:" + server.getHostname()); + cluster.startRegionServer(server.getHostname()); + + } + for (ServerName server : selectedServers) { + cluster.waitForRegionServerToStart(server.getHostname(), PolicyBasedChaosMonkey.TIMEOUT); + } + LOG.info("Started " + selectedServers.size() +" region servers. Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java new file mode 100644 index 0000000..9fc8e58 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Action that changes the encoding on a column family from a list of tables. + */ +public class ChangeEncodingAction extends Action { + private final byte[] tableName; + + private HBaseAdmin admin; + private Random random; + + public ChangeEncodingAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if (columnDescriptors == null || columnDescriptors.length == 0) { + return; + } + + // possible DataBlockEncoding id's + int[] possibleIds = {0, 2, 3, 4, 6}; + for (HColumnDescriptor descriptor : columnDescriptors) { + short id = (short) possibleIds[random.nextInt(possibleIds.length)]; + descriptor.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id)); + } + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java new file mode 100644 index 0000000..499c501 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Action that changes the number of versions on a column family from a list of tables. + * + * Always keeps at least 1 as the number of versions. + */ +public class ChangeVersionsAction extends Action { + private final byte[] tableName; + + private HBaseAdmin admin; + private Random random; + + public ChangeVersionsAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if ( columnDescriptors == null || columnDescriptors.length == 0) { + return; + } + + int versions = random.nextInt(3) + 1; + for(HColumnDescriptor descriptor:columnDescriptors) { + descriptor.setMaxVersions(versions); + descriptor.setMinVersions(versions); + } + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java new file mode 100644 index 0000000..cba06dc --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Region that queues a compaction of a random region from the table. + */ +public class CompactRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final int majorRatio; + private final long sleepTime; + private final String tableName; + + public CompactRandomRegionOfTableAction( + String tableName, float majorRatio) { + this(-1, tableName, majorRatio); + } + + public CompactRandomRegionOfTableAction( + int sleepTime, String tableName, float majorRatio) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.majorRatio = (int) (100 * majorRatio); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + List regions = admin.getTableRegions(tableNameBytes); + boolean major = RandomUtils.nextInt(100) < majorRatio; + + LOG.info("Performing action: Compact random region of table " + + tableName + ", major=" + major); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + + if (major) { + LOG.debug("Major compacting region " + region.getRegionNameAsString()); + admin.majorCompact(region.getRegionName()); + } else { + LOG.debug("Compacting region " + region.getRegionNameAsString()); + admin.compact(region.getRegionName()); + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java new file mode 100644 index 0000000..0fae29b --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Created by eclark on 8/12/13. +*/ +public class CompactTableAction extends Action { + private final byte[] tableNameBytes; + private final int majorRatio; + private final long sleepTime; + private final String tableName; + + public CompactTableAction(String tableName, float majorRatio) { + this(-1, tableName, majorRatio); + } + + public CompactTableAction( + int sleepTime, String tableName, float majorRatio) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.majorRatio = (int) (100 * majorRatio); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + boolean major = RandomUtils.nextInt(100) < majorRatio; + + LOG.info("Performing action: Compact table " + tableName + ", major=" + major); + if (major) { + admin.majorCompact(tableNameBytes); + } else { + admin.compact(tableNameBytes); + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java new file mode 100644 index 0000000..c4ac4d8 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to flush a random region of a table. +*/ +public class FlushRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public FlushRandomRegionOfTableAction(String tableName) { + this (-1, tableName); + } + + public FlushRandomRegionOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Flush random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Flushing region " + region.getRegionNameAsString()); + admin.flush(region.getRegionName()); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java new file mode 100644 index 0000000..5e6ec1d --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to flush a table. +*/ +public class FlushTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public FlushTableAction(String tableName) { + this(-1, tableName); + } + + public FlushTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Flush table " + tableName); + admin.flush(tableNameBytes); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java new file mode 100644 index 0000000..9909c6e --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +/** +* Action that tries to force a balancer run. +*/ +public class ForceBalancerAction extends Action { + @Override + public void perform() throws Exception { + LOG.info("Balancing regions"); + forceBalancer(); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java new file mode 100644 index 0000000..0563c05 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Action to merge regions of a table. + */ +public class MergeRandomAdjacentRegionsOfTableAction extends Action { + private final byte[] tableNameBytes; + private final String tableName; + private final long sleepTime; + + public MergeRandomAdjacentRegionsOfTableAction(String tableName) { + this(-1, tableName); + } + + public MergeRandomAdjacentRegionsOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + this.sleepTime = sleepTime; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Merge random adjacent regions of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + if (regions.size() < 2) { + LOG.info("Table " + tableName + " doesn't have enough region to merge"); + return; + } + + int i = RandomUtils.nextInt(regions.size() - 1); + HRegionInfo a = regions.get(i++); + HRegionInfo b = regions.get(i); + LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString()); + admin.mergeRegions(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java new file mode 100644 index 0000000..9652a5a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to move a random region of a table. +*/ +public class MoveRandomRegionOfTableAction extends Action { + private final long sleepTime; + private final byte[] tableNameBytes; + private final String tableName; + + public MoveRandomRegionOfTableAction(String tableName) { + this(-1, tableName); + } + + public MoveRandomRegionOfTableAction(long sleepTime, String tableName) { + this.sleepTime = sleepTime; + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Move random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Unassigning region " + region.getRegionNameAsString()); + admin.unassign(region.getRegionName(), false); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java new file mode 100644 index 0000000..8f21a0b --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.Collection; +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to move every region of a table. +*/ +public class MoveRegionsOfTableAction extends Action { + private final long sleepTime; + private final byte[] tableNameBytes; + private final String tableName; + + public MoveRegionsOfTableAction(String tableName) { + this(-1, tableName); + } + + public MoveRegionsOfTableAction(long sleepTime, String tableName) { + this.sleepTime = sleepTime; + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + + List regions = admin.getTableRegions(tableNameBytes); + Collection serversList = admin.getClusterStatus().getServers(); + ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]); + + LOG.info("Performing action: Move regions of table " + tableName); + for (HRegionInfo regionInfo:regions) { + try { + String destServerName = + servers[RandomUtils.nextInt(servers.length)].getServerName(); + LOG.debug("Moving " + regionInfo.getRegionNameAsString() + " to " + destServerName); + admin.move(regionInfo.getRegionName(), Bytes.toBytes(destServerName)); + } catch (Exception e) { + LOG.debug("Error moving region", e); + } + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java new file mode 100644 index 0000000..17e0bd6 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Random; +import java.util.Set; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Action that removes a column family. + */ +public class RemoveColumnAction extends Action { + private final byte[] tableName; + private final Set protectedColumns; + private HBaseAdmin admin; + private Random random; + + public RemoveColumnAction(String tableName, Set protectedColumns) { + this.tableName = Bytes.toBytes(tableName); + this.protectedColumns = protectedColumns; + random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if (columnDescriptors.length <= 1) { + return; + } + + int index = random.nextInt(columnDescriptors.length); + while(protectedColumns.contains(columnDescriptors[index].getNameAsString())) { + index = random.nextInt(columnDescriptors.length); + } + + tableDescriptor.removeFamily(columnDescriptors[index].getName()); + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java new file mode 100644 index 0000000..8795352 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.util.Threads; + +/** +* Base class for restarting HBaseServer's +*/ +public class RestartActionBaseAction extends Action { + long sleepTime; // how long should we sleep + + public RestartActionBaseAction(long sleepTime) { + this.sleepTime = sleepTime; + } + + void sleep(long sleepTime) { + LOG.info("Sleeping for:" + sleepTime); + Threads.sleep(sleepTime); + } + + void restartMaster(ServerName server, long sleepTime) throws IOException { + sleepTime = Math.max(sleepTime, 1000); + killMaster(server); + sleep(sleepTime); + startMaster(server); + } + + void restartRs(ServerName server, long sleepTime) throws IOException { + sleepTime = Math.max(sleepTime, 1000); + killRs(server); + sleep(sleepTime); + startRs(server); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java new file mode 100644 index 0000000..a9bc23a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; + +/** +* Action that tries to restart the active master. +*/ +public class RestartActiveMasterAction extends RestartActionBaseAction { + public RestartActiveMasterAction(long sleepTime) { + super(sleepTime); + } + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart active master"); + + ServerName master = cluster.getClusterStatus().getMaster(); + restartMaster(master, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java new file mode 100644 index 0000000..18cdf4a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +/** +* Created by eclark on 8/12/13. +*/ +public class RestartRandomRsAction extends RestartActionBaseAction { + public RestartRandomRsAction(long sleepTime) { + super(sleepTime); + } + + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart random region server"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + + restartRs(server, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java new file mode 100644 index 0000000..10d5e14 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; + +/** +* Action that tries to restart the HRegionServer holding Meta. +*/ +public class RestartRsHoldingMetaAction extends RestartActionBaseAction { + public RestartRsHoldingMetaAction(long sleepTime) { + super(sleepTime); + } + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart region server holding META"); + ServerName server = cluster.getServerHoldingMeta(); + if (server == null) { + LOG.warn("No server is holding .META. right now."); + return; + } + restartRs(server, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java new file mode 100644 index 0000000..790d0c6 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.HTable; + +/** +* Action that restarts an HRegionServer holding one of the regions of the table. +*/ +public class RestartRsHoldingTableAction extends RestartActionBaseAction { + + private final String tableName; + + public RestartRsHoldingTableAction(long sleepTime, String tableName) { + super(sleepTime); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HTable table = null; + try { + Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration(); + table = new HTable(conf, tableName); + } catch (IOException e) { + LOG.debug("Error creating HTable used to get list of region locations.", e); + return; + } + + Collection serverNames = table.getRegionLocations().values(); + ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]); + + restartRs(nameArray[RandomUtils.nextInt(nameArray.length)], sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java new file mode 100644 index 0000000..4b6ccfb --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +/** + * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a + * server, or starts one, sleeping randomly (0-sleepTime) in between steps. + */ +public class RollingBatchRestartRsAction extends BatchRestartRsAction { + public RollingBatchRestartRsAction(long sleepTime, float ratio) { + super(sleepTime, ratio); + } + + @Override + public void perform() throws Exception { + LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", + (int)(ratio * 100))); + List selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), + ratio); + + Queue serversToBeKilled = new LinkedList(selectedServers); + Queue deadServers = new LinkedList(); + + // + while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) { + boolean action = true; //action true = kill server, false = start server + + if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) { + action = deadServers.isEmpty(); + } else { + action = RandomUtils.nextBoolean(); + } + + if (action) { + ServerName server = serversToBeKilled.remove(); + killRs(server); + deadServers.add(server); + } else { + ServerName server = deadServers.remove(); + startRs(server); + } + + sleep(RandomUtils.nextInt((int)sleepTime)); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java new file mode 100644 index 0000000..3071bd6 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; + +/** +* Action that tries to take a snapshot of a table. +*/ +public class SnapshotTableAction extends Action { + private final String tableName; + private final long sleepTime; + + public SnapshotTableAction(String tableName) { + this(-1, tableName); + } + + public SnapshotTableAction(int sleepTime, String tableName) { + this.tableName = tableName; + this.sleepTime = sleepTime; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + String snapshotName = tableName + "-it-" + System.currentTimeMillis(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Snapshot table " + tableName); + admin.snapshot(snapshotName, tableName); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java new file mode 100644 index 0000000..116a447 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to split a random region of a table. +*/ +public class SplitRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public SplitRandomRegionOfTableAction(String tableName) { + this(-1, tableName); + } + + public SplitRandomRegionOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Split random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Splitting region " + region.getRegionNameAsString()); + admin.split(region.getRegionName()); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java new file mode 100644 index 0000000..ca8af79 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java @@ -0,0 +1,49 @@ +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.ServerName; +import org.junit.Assert; + +/** This action is too specific to put in ChaosMonkey; put it here */ +public class UnbalanceKillAndRebalanceAction extends Action { + /** Fractions of servers to get regions and live and die respectively; from all other + * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ + private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; + private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; + private static final double HOARD_FRC_OF_REGIONS = 0.8; + /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance + * and restarting the servers; to make sure these events have time to impact the cluster. */ + private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; + private static final long WAIT_FOR_KILLS_MS = 2 * 1000; + private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; + + @Override + public void perform() throws Exception { + ClusterStatus status = this.cluster.getClusterStatus(); + List victimServers = new LinkedList(status.getServers()); + int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); + int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); + Assert.assertTrue((liveCount + deadCount) < victimServers.size()); + List targetServers = new ArrayList(liveCount); + for (int i = 0; i < liveCount + deadCount; ++i) { + int victimIx = RandomUtils.nextInt(victimServers.size()); + targetServers.add(victimServers.remove(victimIx)); + } + unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); + Thread.sleep(WAIT_FOR_UNBALANCE_MS); + for (int i = 0; i < liveCount; ++i) { + killRs(targetServers.get(i)); + } + Thread.sleep(WAIT_FOR_KILLS_MS); + forceBalancer(); + Thread.sleep(WAIT_AFTER_BALANCE_MS); + for (int i = 0; i < liveCount; ++i) { + startRs(targetServers.get(i)); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java new file mode 100644 index 0000000..2779bd1 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.ServerName; + +/** +* Action that tries to unbalance the regions of a cluster. +*/ +public class UnbalanceRegionsAction extends Action { + private double fractionOfRegions; + private double fractionOfServers; + + /** + * Unbalances the regions on the cluster by choosing "target" servers, and moving + * some regions from each of the non-target servers to random target servers. + * @param fractionOfRegions Fraction of regions to move from each server. + * @param fractionOfServers Fraction of servers to be chosen as targets. + */ + public UnbalanceRegionsAction(double fractionOfRegions, double fractionOfServers) { + this.fractionOfRegions = fractionOfRegions; + this.fractionOfServers = fractionOfServers; + } + + @Override + public void perform() throws Exception { + LOG.info("Unbalancing regions"); + ClusterStatus status = this.cluster.getClusterStatus(); + List victimServers = new LinkedList(status.getServers()); + int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); + List targetServers = new ArrayList(targetServerCount); + for (int i = 0; i < targetServerCount; ++i) { + int victimIx = RandomUtils.nextInt(victimServers.size()); + targetServers.add(victimServers.remove(victimIx)); + } + unbalanceRegions(status, victimServers, targetServers, fractionOfRegions); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java new file mode 100644 index 0000000..e109585 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.monkies.CalmChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; + + +/** + * Factory to create a calm ChaosMonkey. + */ +public class CalmMonkeyFactory extends MonkeyFactory { + @Override + public ChaosMonkey build() { + return new CalmChaosMonkey(); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java new file mode 100644 index 0000000..4488b86 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; + +/** + * Base class of the factory that will create a ChaosMonkey. + */ +public abstract class MonkeyFactory { + + protected String tableName; + protected Set columnFamilies; + protected IntegrationTestingUtility util; + + public MonkeyFactory setTableName(String tableName) { + this.tableName = tableName; + return this; + } + + public MonkeyFactory setColumnFamilies(Set columnFamilies) { + this.columnFamilies = columnFamilies; + return this; + } + + public MonkeyFactory setUtil(IntegrationTestingUtility util) { + this.util = util; + return this; + } + + public abstract ChaosMonkey build(); + + + public static final String CALM = "calm"; + public static final String SLOW_DETERMINISTIC = "slowDeterministic"; + public static final String UNBALANCE = "unbalance"; + + public static Map FACTORIES = ImmutableMap.builder() + .put(CALM, new CalmMonkeyFactory()) + .put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory()) + .put(UNBALANCE, new UnbalanceMonkeyFactory()) + .build(); + + public static MonkeyFactory getFactory(String factoryName) { + MonkeyFactory fact = FACTORIES.get(factoryName); + if (fact == null) { + fact = FACTORIES.get(CALM); + } + return fact; + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java new file mode 100644 index 0000000..af478cf --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.AddColumnAction; +import org.apache.hadoop.hbase.chaos.actions.BatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeEncodingAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeVersionsAction; +import org.apache.hadoop.hbase.chaos.actions.CompactRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.CompactTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushTableAction; +import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.RemoveColumnAction; +import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.SnapshotTableAction; +import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy; +import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; + +public class SlowDeterministicMonkeyFactory extends MonkeyFactory { + @Override + public ChaosMonkey build() { + + // Actions such as compact/flush a table/region, + // move one region around. They are not so destructive, + // can be executed more frequently. + Action[] actions1 = new Action[] { + new CompactTableAction(tableName, 0.5f), + new CompactRandomRegionOfTableAction(tableName, 0.6f), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) + }; + + // Actions such as split/merge/snapshot. + // They should not cause data loss, or unreliability + // such as region stuck in transition. + Action[] actions2 = new Action[] { + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new ChangeEncodingAction(tableName), + new ChangeVersionsAction(tableName) + }; + + // Destructive actions to mess things around. + Action[] actions3 = new Action[] { + new MoveRegionsOfTableAction(tableName), + new RestartRandomRsAction(60000), + new BatchRestartRsAction(5000, 0.5f), + new RestartActiveMasterAction(5000), + new RollingBatchRestartRsAction(5000, 1.0f), + new RestartRsHoldingMetaAction(35000) + }; + + return new PolicyBasedChaosMonkey(util, + new PeriodicRandomActionPolicy(60 * 1000, actions1), + new PeriodicRandomActionPolicy(90 * 1000, actions2), + new CompositeSequentialPolicy( + new DoActionsOncePolicy(150 * 1000, actions3), + new PeriodicRandomActionPolicy(150 * 1000, actions3))); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java new file mode 100644 index 0000000..2d2d315 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.actions.UnbalanceKillAndRebalanceAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; +import org.apache.hadoop.hbase.chaos.policies.Policy; + +public class UnbalanceMonkeyFactory extends MonkeyFactory { + /** How often to introduce the chaos. If too frequent, sequence of kills on minicluster + * can cause test to fail when Put runs out of retries. */ + private static final long CHAOS_EVERY_MS = 65 * 1000; + + @Override + public ChaosMonkey build() { + Policy chaosPolicy = new PeriodicRandomActionPolicy( + CHAOS_EVERY_MS, + new UnbalanceKillAndRebalanceAction() + ); + + return new PolicyBasedChaosMonkey(util, chaosPolicy); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java new file mode 100644 index 0000000..8aa858a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java @@ -0,0 +1,27 @@ +package org.apache.hadoop.hbase.chaos.monkies; + + +/** + * Chaos Monkey that does nothing. + */ +public class CalmChaosMonkey extends ChaosMonkey { + @Override + public void start() throws Exception { + + } + + @Override + public void stop(String why) { + + } + + @Override + public boolean isStopped() { + return false; + } + + @Override + public void waitForStop() throws InterruptedException { + + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java new file mode 100644 index 0000000..52bd7d2 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java @@ -0,0 +1,15 @@ +package org.apache.hadoop.hbase.chaos.monkies; + +import org.apache.hadoop.hbase.Stoppable; + +public abstract class ChaosMonkey implements Stoppable { + public abstract void start() throws Exception; + + @Override + public abstract void stop(String why); + + @Override + public abstract boolean isStopped(); + + public abstract void waitForStop() throws InterruptedException; +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java new file mode 100644 index 0000000..d4528ef --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.monkies; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.policies.Policy; +import org.apache.hadoop.hbase.util.Pair; + +/** + * A utility to injects faults in a running cluster. + *

+ * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like + * - Select a random server to kill + * - Sleep for 5 sec + * - Start the server on the same host + * Actions can also be complex events, like rolling restart of all of the servers. + *

+ * Policies on the other hand are responsible for executing the actions based on a strategy. + * The default policy is to execute a random action every minute based on predefined action + * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one + * policy can be active at any time. + *

+ * Chaos monkey can be run from the command line, or can be invoked from integration tests. + * See {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use + * chaos monkey for code examples. + *

+ * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: + * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html + */ +public class PolicyBasedChaosMonkey extends ChaosMonkey { + + private static final Log LOG = LogFactory.getLog(PolicyBasedChaosMonkey.class); + private static final long ONE_SEC = 1000; + private static final long FIVE_SEC = 5 * ONE_SEC; + private static final long ONE_MIN = 60 * ONE_SEC; + + public static final long TIMEOUT = ONE_MIN; + + final IntegrationTestingUtility util; + + /** + * Construct a new ChaosMonkey + * @param util the HBaseIntegrationTestingUtility already configured + * @param policies custom policies to use + */ + public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Policy... policies) { + this.util = util; + this.policies = policies; + } + + public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Collection policies) { + this.util = util; + this.policies = policies.toArray(new Policy[policies.size()]); + } + + + /** Selects a random item from the given items */ + public static T selectRandomItem(T[] items) { + return items[RandomUtils.nextInt(items.length)]; + } + + /** Selects a random item from the given items with weights*/ + public static T selectWeightedRandomItem(List> items) { + int totalWeight = 0; + for (Pair pair : items) { + totalWeight += pair.getSecond(); + } + + int cutoff = RandomUtils.nextInt(totalWeight); + int cummulative = 0; + T item = null; + + //warn: O(n) + for (int i=0; i List selectRandomItems(T[] items, float ratio) { + int remaining = (int)Math.ceil(items.length * ratio); + + List selectedItems = new ArrayList(remaining); + + for (int i=0; i 0; i++) { + if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) { + selectedItems.add(items[i]); + remaining--; + } + } + + return selectedItems; + } + + private Policy[] policies; + private Thread[] monkeyThreads; + + @Override + public void start() throws Exception { + monkeyThreads = new Thread[policies.length]; + + for (int i=0; i policies; + public CompositeSequentialPolicy(Policy... policies) { + this.policies = Arrays.asList(policies); + } + + @Override + public void stop(String why) { + super.stop(why); + for (Policy p : policies) { + p.stop(why); + } + } + + @Override + public void run() { + for (Policy p : policies) { + p.run(); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Policy p : policies) { + p.init(context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java new file mode 100644 index 0000000..e03816a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.util.StringUtils; + +/** A policy which performs a sequence of actions deterministically. */ +public class DoActionsOncePolicy extends PeriodicPolicy { + private List actions; + + public DoActionsOncePolicy(long periodMs, List actions) { + super(periodMs); + this.actions = new ArrayList(actions); + } + + public DoActionsOncePolicy(long periodMs, Action... actions) { + this(periodMs, Arrays.asList(actions)); + } + + @Override + protected void runOneIteration() { + if (actions.isEmpty()) { + this.stop("done"); + return; + } + Action action = actions.remove(0); + + try { + action.perform(); + } catch (Exception ex) { + LOG.warn("Exception occured during performing action: " + + StringUtils.stringifyException(ex)); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Action action : actions) { + action.init(this.context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java new file mode 100644 index 0000000..b7b6109 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.util.Threads; + +/** A policy which does stuff every time interval. */ +public abstract class PeriodicPolicy extends Policy { + private long periodMs; + + public PeriodicPolicy(long periodMs) { + this.periodMs = periodMs; + } + + @Override + public void run() { + // Add some jitter. + int jitter = RandomUtils.nextInt((int) periodMs); + LOG.info("Sleeping for " + jitter + " to add jitter"); + Threads.sleep(jitter); + + while (!isStopped()) { + long start = System.currentTimeMillis(); + runOneIteration(); + + if (isStopped()) return; + long sleepTime = periodMs - (System.currentTimeMillis() - start); + if (sleepTime > 0) { + LOG.info("Sleeping for: " + sleepTime); + Threads.sleep(sleepTime); + } + } + } + + protected abstract void runOneIteration(); + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + LOG.info("Using ChaosMonkey Policy: " + this.getClass() + ", period: " + periodMs); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java new file mode 100644 index 0000000..8912467 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.util.StringUtils; + +/** + * A policy, which picks a random action according to the given weights, + * and performs it every configurable period. + */ +public class PeriodicRandomActionPolicy extends PeriodicPolicy { + private List> actions; + + public PeriodicRandomActionPolicy(long periodMs, List> actions) { + super(periodMs); + this.actions = actions; + } + + public PeriodicRandomActionPolicy(long periodMs, Pair... actions) { + // We don't expect it to be modified. + this(periodMs, Arrays.asList(actions)); + } + + public PeriodicRandomActionPolicy(long periodMs, Action... actions) { + super(periodMs); + this.actions = new ArrayList>(actions.length); + for (Action action : actions) { + this.actions.add(new Pair(action, 1)); + } + } + + @Override + protected void runOneIteration() { + Action action = PolicyBasedChaosMonkey.selectWeightedRandomItem(actions); + try { + action.perform(); + } catch (Exception ex) { + LOG.warn("Exception occured during performing action: " + + StringUtils.stringifyException(ex)); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Pair action : actions) { + action.getFirst().init(this.context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java new file mode 100644 index 0000000..affaeda --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.util.StoppableImplementation; + +/** + * A policy to introduce chaos to the cluster + */ +public abstract class Policy extends StoppableImplementation implements Runnable { + + protected static Log LOG = LogFactory.getLog(Policy.class); + + protected PolicyContext context; + + public void init(PolicyContext context) throws Exception { + this.context = context; + } + + /** + * A context for a Policy + */ + public static class PolicyContext extends Action.ActionContext { + public PolicyContext(IntegrationTestingUtility util) { + super(util); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestBulkLoad.java hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestBulkLoad.java index 7120fe6..239f9aa 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestBulkLoad.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestBulkLoad.java @@ -18,11 +18,20 @@ */ package org.apache.hadoop.hbase.mapreduce; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; + import org.apache.commons.lang.RandomStringUtils; -import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.IntegrationTestBase; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; import org.apache.hadoop.hbase.KeyValue; @@ -31,7 +40,6 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.EnvironmentEdge; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.io.LongWritable; @@ -51,22 +59,12 @@ import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.junit.AfterClass; -import org.junit.BeforeClass; +import org.junit.After; +import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.UUID; - import static org.junit.Assert.assertEquals; /** @@ -101,7 +99,7 @@ import static org.junit.Assert.assertEquals; * */ @Category(IntegrationTests.class) -public class IntegrationTestBulkLoad implements Configurable, Tool { +public class IntegrationTestBulkLoad extends IntegrationTestBase { private static byte[] CHAIN_FAM = Bytes.toBytes("L"); private static byte[] SORT_FAM = Bytes.toBytes("S"); @@ -120,11 +118,6 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { private static String TABLE_NAME_KEY = "hbase.IntegrationTestBulkLoad.tableName"; private static String TABLE_NAME = "IntegrationTestBulkLoad"; - private static IntegrationTestingUtility util; - - private String tableName; - private byte[] tableNameBytes; - @Test public void testBulkLoad() throws Exception { setupTable(); @@ -143,14 +136,12 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { } private void setupTable() throws IOException { - tableName = getConf().get(TABLE_NAME_KEY, TABLE_NAME); - tableNameBytes = Bytes.toBytes(tableName); - if (util.getHBaseAdmin().tableExists(tableNameBytes)) { - util.deleteTable(tableNameBytes); + if (util.getHBaseAdmin().tableExists(getTablename())) { + util.deleteTable(getTablename()); } util.createTable( - tableNameBytes, + Bytes.toBytes(getTablename()), new byte[][]{CHAIN_FAM, SORT_FAM, DATA_FAM}, getSplits(16) ); @@ -160,8 +151,8 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { String jobName = IntegrationTestBulkLoad.class.getSimpleName() + " - " + EnvironmentEdgeManager.currentTimeMillis(); Configuration conf = new Configuration(util.getConfiguration()); - Path p = util.getDataTestDirOnTestFS(tableName + "-" + iteration); - HTable table = new HTable(conf, tableName); + Path p = util.getDataTestDirOnTestFS(getTablename() + "-" + iteration); + HTable table = new HTable(conf, getTablename()); conf.setBoolean("mapreduce.map.speculative", false); conf.setBoolean("mapreduce.reduce.speculative", false); @@ -532,7 +523,7 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { */ private void runCheck() throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = getConf(); - String jobName = tableName + "_check" + EnvironmentEdgeManager.currentTimeMillis(); + String jobName = getTablename() + "_check" + EnvironmentEdgeManager.currentTimeMillis(); Path p = util.getDataTestDirOnTestFS(jobName); Job job = new Job(conf); @@ -551,7 +542,7 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { s.setBatch(100); TableMapReduceUtil.initTableMapperJob( - Bytes.toBytes(tableName), + Bytes.toBytes(getTablename()), new Scan(), LinkedListCheckingMapper.class, LinkKey.class, @@ -571,12 +562,10 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { util.getTestFileSystem().delete(p, true); } - @BeforeClass - public static void provisionCluster() throws Exception { - if (null == util) { - util = new IntegrationTestingUtility(); - } - + @Before + @Override + public void setUp() throws Exception { + util = getTestingUtil(getConf()); util.initializeCluster(1); // Scale this up on a real cluster @@ -590,30 +579,27 @@ public class IntegrationTestBulkLoad implements Configurable, Tool { } } - @AfterClass - public static void releaseCluster() throws Exception { + @After + @Override + public void cleanUp() throws Exception { util.restoreCluster(); util = null; } @Override - public int run(String[] args) throws Exception { - provisionCluster(); - testBulkLoad(); - releaseCluster(); + public int runTestFromCommandLine() throws Exception { + runCheck(); return 0; } - public void setConf(Configuration conf) { - if (util != null) { - throw new IllegalArgumentException("setConf not supported after the cluster has been started."); - } - util = new IntegrationTestingUtility(conf); + @Override + public String getTablename() { + return getConf().get(TABLE_NAME_KEY, TABLE_NAME); } @Override - public Configuration getConf() { - return util.getConfiguration(); + protected Set getColumnFamilies() { + return null; } public static void main(String[] args) throws Exception { diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java index 248aa85..8542840 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestImportTsv.java @@ -17,11 +17,6 @@ */ package org.apache.hadoop.hbase.mapreduce; -import static java.lang.String.format; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - import java.io.IOException; import java.util.Arrays; import java.util.Iterator; @@ -67,6 +62,11 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import static java.lang.String.format; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * Validate ImportTsv + LoadIncrementalHFiles on a distributed cluster. */ diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java index a4feca5..bb2dae3 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java @@ -18,6 +18,14 @@ package org.apache.hadoop.hbase.mttr; +import java.io.IOException; +import java.util.ArrayList; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + import com.google.common.base.Objects; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.logging.Log; @@ -29,6 +37,11 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -37,7 +50,6 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.KeyOnlyFilter; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ChaosMonkey; import org.apache.hadoop.hbase.util.LoadTestTool; import org.cloudera.htrace.Sampler; import org.cloudera.htrace.Span; @@ -47,14 +59,6 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - import static junit.framework.Assert.assertEquals; /** @@ -124,10 +128,10 @@ public class IntegrationTestMTTR { /** * All of the chaos monkey actions used. */ - private static ChaosMonkey.Action restartRSAction; - private static ChaosMonkey.Action restartMetaAction; - private static ChaosMonkey.Action moveRegionAction; - private static ChaosMonkey.Action restartMasterAction; + private static Action restartRSAction; + private static Action restartMetaAction; + private static Action moveRegionAction; + private static Action restartMasterAction; /** * The load test tool used to create load and make sure that HLogs aren't empty. @@ -163,19 +167,19 @@ public class IntegrationTestMTTR { private static void setupActions() throws IOException { // Set up the action that will restart a region server holding a region from our table // because this table should only have one region we should be good. - restartRSAction = new ChaosMonkey.RestartRsHoldingTable(SLEEP_TIME, tableName.getNameAsString()); + restartRSAction = new RestartRsHoldingTableAction(SLEEP_TIME, tableName.getNameAsString()); // Set up the action that will kill the region holding meta. - restartMetaAction = new ChaosMonkey.RestartRsHoldingMeta(SLEEP_TIME); + restartMetaAction = new RestartRsHoldingMetaAction(SLEEP_TIME); // Set up the action that will move the regions of our table. - moveRegionAction = new ChaosMonkey.MoveRegionsOfTable(SLEEP_TIME, tableName.getNameAsString()); + moveRegionAction = new MoveRegionsOfTableAction(SLEEP_TIME, tableName.getNameAsString()); // Kill the master - restartMasterAction = new ChaosMonkey.RestartActiveMaster(1000); + restartMasterAction = new RestartActiveMasterAction(1000); // Give the action the access to the cluster. - ChaosMonkey.ActionContext actionContext = new ChaosMonkey.ActionContext(util); + Action.ActionContext actionContext = new Action.ActionContext(util); restartRSAction.init(actionContext); restartMetaAction.init(actionContext); moveRegionAction.init(actionContext); @@ -235,7 +239,7 @@ public class IntegrationTestMTTR { @Test public void testRestartRsHoldingTable() throws Exception { - run(new ActionCallable(restartRSAction), "RestartRsHoldingTable"); + run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction"); } @Test @@ -476,9 +480,9 @@ public class IntegrationTestMTTR { public class ActionCallable implements Callable { - private final ChaosMonkey.Action action; + private final Action action; - public ActionCallable(ChaosMonkey.Action action) { + public ActionCallable(Action action) { this.action = action; } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java index aeebfe6..10b3e2d 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedList.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; +import java.util.Set; import java.util.UUID; import org.apache.commons.cli.CommandLine; @@ -37,12 +38,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.IntegrationTestBase; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -147,7 +149,7 @@ import org.junit.experimental.categories.Category; * This class can be run as a unit test, as an integration test, or from the command line */ @Category(IntegrationTests.class) -public class IntegrationTestBigLinkedList extends Configured implements Tool { +public class IntegrationTestBigLinkedList extends IntegrationTestBase { private static final byte[] NO_KEY = new byte[1]; protected static String TABLE_NAME_KEY = "IntegrationTestBigLinkedList.table"; @@ -182,9 +184,11 @@ public class IntegrationTestBigLinkedList extends Configured implements Tool { private static final int WIDTH_DEFAULT = 1000000; private static final int WRAP_DEFAULT = 25; - private static final int ROWKEY_LENGTH = 16; + private String toRun; + private String[] otherArgs; + static class CINode { byte[] key; byte[] prev; @@ -970,38 +974,29 @@ public class IntegrationTestBigLinkedList extends Configured implements Tool { protected IntegrationTestingUtility util; @Before + @Override public void setUp() throws Exception { - util = getTestingUtil(); + util = getTestingUtil(getConf()); util.initializeCluster(this.NUM_SLAVES_BASE); this.setConf(util.getConfiguration()); } @After - public void tearDown() throws Exception { + @Override + public void cleanUp() throws Exception { util.restoreCluster(); } @Test public void testContinuousIngest() throws IOException, Exception { //Loop - int ret = ToolRunner.run(getTestingUtil().getConfiguration(), new Loop(), + int ret = ToolRunner.run(getTestingUtil(getConf()).getConfiguration(), new Loop(), new String[] {"1", "1", "2000000", util.getDataTestDirOnTestFS("IntegrationTestBigLinkedList").toString(), "1"}); org.junit.Assert.assertEquals(0, ret); } - protected IntegrationTestingUtility getTestingUtil() { - if (this.util == null) { - if (getConf() == null) { - this.util = new IntegrationTestingUtility(); - } else { - this.util = new IntegrationTestingUtility(getConf()); - } - } - return util; - } - - private int printUsage() { + private void usage() { System.err.println("Usage: " + this.getClass().getSimpleName() + " COMMAND [COMMAND options]"); System.err.println(" where COMMAND is one of:"); System.err.println(""); @@ -1021,39 +1016,55 @@ public class IntegrationTestBigLinkedList extends Configured implements Tool { System.err.println(" Loop A program to Loop through Generator and"); System.err.println(" Verify steps"); System.err.println("\t "); - return 1; + System.err.flush(); } @Override - public int run(String[] args) throws Exception { + protected void processOptions(CommandLine cmd) { + super.processOptions(cmd); + String[] args = cmd.getArgs(); //get the class, run with the conf if (args.length < 1) { - return printUsage(); + printUsage(); + throw new RuntimeException("Incorrect Number of args."); } + toRun = args[0]; + otherArgs = Arrays.copyOfRange(args, 1, args.length); + } + + @Override + public int runTestFromCommandLine() throws Exception { + Tool tool = null; - if (args[0].equals("Generator")) { + if (toRun.equals("Generator")) { tool = new Generator(); - } else if (args[0].equals("Verify")) { + } else if (toRun.equals("Verify")) { tool = new Verify(); - } else if (args[0].equals("Loop")) { + } else if (toRun.equals("Loop")) { tool = new Loop(); - } else if (args[0].equals("Walker")) { + } else if (toRun.equals("Walker")) { tool = new Walker(); - } else if (args[0].equals("Print")) { + } else if (toRun.equals("Print")) { tool = new Print(); - } else if (args[0].equals("Delete")) { + } else if (toRun.equals("Delete")) { tool = new Delete(); } else { - return printUsage(); + usage(); + throw new RuntimeException("Unknown arg"); } - args = Arrays.copyOfRange(args, 1, args.length); - return ToolRunner.run(getConf(), tool, args); + return ToolRunner.run(getConf(), tool, otherArgs); } - public static void main(String[] args) throws Exception { - int ret = ToolRunner.run(HBaseConfiguration.create(), new IntegrationTestBigLinkedList(), args); - System.exit(ret); + @Override + public String getTablename() { + Configuration c = getConf(); + return c.get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME); + } + + @Override + protected Set getColumnFamilies() { + return null; } private static void setJobConf(Job job, int numMappers, long numNodes, @@ -1073,4 +1084,11 @@ public class IntegrationTestBigLinkedList extends Configured implements Tool { job.getConfiguration().setBoolean(ScannerCallable.LOG_SCANNER_ACTIVITY, true); job.getConfiguration().setInt(TableRecordReaderImpl.LOG_PER_ROW_COUNT, 100000); } + + public static void main(String[] args) throws Exception { + Configuration conf = HBaseConfiguration.create(); + IntegrationTestingUtility.setUseDistributedCluster(conf); + int ret = ToolRunner.run(conf, new IntegrationTestBigLinkedList(), args); + System.exit(ret); + } } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java deleted file mode 100644 index 60cfacc..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.test; - -import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.IntegrationTestingUtility; -import org.apache.hadoop.hbase.IntegrationTests; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.*; -import org.apache.hadoop.util.ToolRunner; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - - -/** - * This is the same integration test as {@link IntegrationTestBigLinkedList} while killing the - * region servers and the master(s) randomly - */ -@Category(IntegrationTests.class) -public class IntegrationTestBigLinkedListWithChaosMonkey extends IntegrationTestBigLinkedList { - private static final Log LOG = LogFactory - .getLog(IntegrationTestBigLinkedListWithChaosMonkey.class); - - private ChaosMonkey monkey; - - public IntegrationTestBigLinkedListWithChaosMonkey() { - super(); - Configuration conf = getConf(); - if (conf != null) { - conf.set(TABLE_NAME_KEY, "IntegrationTestBigLinkedListWithChaosMonkey"); - } else { - this.getTestingUtil().getConfiguration() - .set(TABLE_NAME_KEY, "IntegrationTestBigLinkedListWithChaosMonkey"); - setConf(conf); - } - } - - @Before - public void setUp() throws Exception { - if (!getTestingUtil().isDistributedCluster()) { - this.NUM_SLAVES_BASE = 5; // only used in MiniCluster mode - } - super.setUp(); - - String tableName = getConf().get(TABLE_NAME_KEY, DEFAULT_TABLE_NAME); - - // Actions such as compact/flush a table/region, - // move one region around. They are not so destructive, - // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) - }; - - // Actions such as split/merge/snapshot. - // They should not cause data loss, or unreliability - // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new MoveRegionsOfTable(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName), - new ChangeEncodingAction(tableName), - new ChangeVersionsAction(tableName) - }; - - monkey = new ChaosMonkey(util, - new PeriodicRandomActionPolicy(30 * 1000, actions1), - new PeriodicRandomActionPolicy(60 * 1000, actions2), - ChaosMonkey.getPolicyByName(ChaosMonkey.EVERY_MINUTE_RANDOM_ACTION_POLICY)); - LOG.info("Chaos Monkey Starting"); - monkey.start(); - } - - @After - public void tearDown() throws Exception { - if (monkey != null) { - monkey.stop("test has finished, that's why"); - monkey.waitForStop(); - } - super.tearDown(); - } - - @Test - public void testContinuousIngest() throws IOException, Exception { - // Loop - int ret = ToolRunner.run( - getTestingUtil().getConfiguration(), - new Loop(), - new String[] { "1", "1", "1000000", - util.getDataTestDirOnTestFS("IntegrationTestBigLinkedListWithChaosMonkey").toString(), - "1" }); - org.junit.Assert.assertEquals(0, ret); - } - - public static void main(String[] args) throws Exception { - - IntegrationTestBigLinkedListWithChaosMonkey test = - new IntegrationTestBigLinkedListWithChaosMonkey(); - IntegrationTestingUtility.setUseDistributedCluster(test.getTestingUtil().getConfiguration()); - // set minimum cluster size requirements - test.NUM_SLAVES_BASE = 3; - test.setUp(); - - int ret = -1; - - try { - // run the test - ret = ToolRunner.run(test.getTestingUtil().getConfiguration(), test, args); - }finally { - // Make 100% sure that the monkey stops. - test.tearDown(); - } - - System.exit(ret); - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java index b827b67..dab964d 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadAndVerify.java @@ -17,27 +17,28 @@ */ package org.apache.hadoop.hbase.test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - import java.io.IOException; import java.util.Random; +import java.util.Set; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.apache.commons.cli.CommandLine; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.IntegrationTestBase; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -56,14 +57,14 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; -import com.google.common.collect.Lists; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; /** * A large test which loads a lot of data that has internal references, and @@ -83,7 +84,7 @@ import com.google.common.collect.Lists; * Originally taken from Apache Bigtop. */ @Category(IntegrationTests.class) -public class IntegrationTestLoadAndVerify extends Configured implements Tool { +public class IntegrationTestLoadAndVerify extends IntegrationTestBase { private static final String TEST_NAME = "IntegrationTestLoadAndVerify"; private static final byte[] TEST_FAMILY = Bytes.toBytes("f1"); private static final byte[] TEST_QUALIFIER = Bytes.toBytes("q1"); @@ -107,6 +108,8 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { private IntegrationTestingUtility util; + private String toRun = null; + private enum Counters { ROWS_WRITTEN, REFERENCES_WRITTEN, @@ -115,7 +118,7 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { @Before public void setUp() throws Exception { - util = getTestingUtil(); + util = getTestingUtil(getConf()); util.initializeCluster(3); this.setConf(util.getConfiguration()); getConf().setLong(NUM_TO_WRITE_KEY, NUM_TO_WRITE_DEFAULT / 100); @@ -123,11 +126,6 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { getConf().setInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT / 10); } - @After - public void tearDown() throws Exception { - util.restoreCluster(); - } - /** * Converts a "long" value between endian systems. * Borrowed from Apache Commons IO @@ -147,6 +145,12 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { ( ( ( value >> 56 ) & 0xff ) << 0 ); } + @Override + @After + public void cleanUp() throws Exception { + util.restoreCluster(); + } + public static class LoadMapper extends Mapper { @@ -353,7 +357,7 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TEST_NAME)); htd.addFamily(new HColumnDescriptor(TEST_FAMILY)); - HBaseAdmin admin = getTestingUtil().getHBaseAdmin(); + HBaseAdmin admin = getTestingUtil(getConf()).getHBaseAdmin(); int numPreCreate = 40; admin.createTable(htd, Bytes.toBytes(0L), Bytes.toBytes(-1L), numPreCreate); @@ -398,33 +402,41 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { System.err.println(" -Dverify.scannercaching= Number hbase scanner caching rows to read (default 50)"); } - public int run(String argv[]) throws Exception { - if (argv.length < 1 || argv.length > 1) { + + @Override + protected void processOptions(CommandLine cmd) { + super.processOptions(cmd); + + String[] args = cmd.getArgs(); + if (args == null || args.length < 1 || args.length > 1) { usage(); - return 1; + throw new RuntimeException("Incorrect Number of args."); } + toRun = args[0]; + } + public int runTestFromCommandLine() throws Exception { IntegrationTestingUtility.setUseDistributedCluster(getConf()); boolean doLoad = false; boolean doVerify = false; boolean doDelete = getConf().getBoolean("loadmapper.deleteAfter",true); int numPresplits = getConf().getInt("loadmapper.numPresplits", 40); - if (argv[0].equals("load")) { + if (toRun.equals("load")) { doLoad = true; - } else if (argv[0].equals("verify")) { + } else if (toRun.equals("verify")) { doVerify= true; - } else if (argv[0].equals("loadAndVerify")) { + } else if (toRun.equals("loadAndVerify")) { doLoad=true; doVerify= true; } else { - System.err.println("Invalid argument " + argv[0]); + System.err.println("Invalid argument " + toRun); usage(); return 1; } // create HTableDescriptor for specified table - String table = getConf().get(TABLE_NAME_KEY, TEST_NAME); + String table = getTablename(); HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(table)); htd.addFamily(new HColumnDescriptor(TEST_FAMILY)); @@ -442,19 +454,19 @@ public class IntegrationTestLoadAndVerify extends Configured implements Tool { return 0; } - private IntegrationTestingUtility getTestingUtil() { - if (this.util == null) { - if (getConf() == null) { - this.util = new IntegrationTestingUtility(); - } else { - this.util = new IntegrationTestingUtility(getConf()); - } - } - return util; + @Override + public String getTablename() { + return getConf().get(TABLE_NAME_KEY, TEST_NAME); + } + + @Override + protected Set getColumnFamilies() { + return Sets.newHashSet(Bytes.toString(TEST_FAMILY)); } public static void main(String argv[]) throws Exception { Configuration conf = HBaseConfiguration.create(); + IntegrationTestingUtility.setUseDistributedCluster(conf); int ret = ToolRunner.run(conf, new IntegrationTestLoadAndVerify(), argv); System.exit(ret); } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java deleted file mode 100644 index cafceb7..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java +++ /dev/null @@ -1,1213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.util; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.Random; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.lang.RandomStringUtils; -import org.apache.commons.lang.math.RandomUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.HBaseCluster; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey; -import org.apache.hadoop.hbase.IntegrationTestingUtility; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.Stoppable; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolRunner; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -/** - * A utility to injects faults in a running cluster. - *

- * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like - * - Select a random server to kill - * - Sleep for 5 sec - * - Start the server on the same host - * Actions can also be complex events, like rolling restart of all of the servers. - *

- * Policies on the other hand are responsible for executing the actions based on a strategy. - * The default policy is to execute a random action every minute based on predefined action - * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one - * policy can be active at any time. - *

- * Chaos monkey can be run from the command line, or can be invoked from integration tests. - * See {@link IntegrationTestDataIngestWithChaosMonkey} or other integration tests that use - * chaos monkey for code examples. - *

- * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: - * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html - */ -public class ChaosMonkey extends AbstractHBaseTool implements Stoppable { - - private static final Log LOG = LogFactory.getLog(ChaosMonkey.class); - - private static final long ONE_SEC = 1000; - private static final long FIVE_SEC = 5 * ONE_SEC; - private static final long ONE_MIN = 60 * ONE_SEC; - private static final long TIMEOUT = ONE_MIN; - - final IntegrationTestingUtility util; - - /** - * Construct a new ChaosMonkey - * @param util the HBaseIntegrationTestingUtility already configured - * @param policies names of pre-defined policies to use - */ - public ChaosMonkey(IntegrationTestingUtility util, String... policies) { - this.util = util; - setPoliciesByName(policies); - } - - /** - * Construct a new ChaosMonkey - * @param util the HBaseIntegrationTestingUtility already configured - * @param policies custom policies to use - */ - public ChaosMonkey(IntegrationTestingUtility util, Policy... policies) { - this.util = util; - this.policies = policies; - } - - private void setPoliciesByName(String... policies) { - this.policies = new Policy[policies.length]; - for (int i=0; i < policies.length; i++) { - this.policies[i] = getPolicyByName(policies[i]); - } - } - - public static Policy getPolicyByName(String policy) { - return NAMED_POLICIES.get(policy); - } - - /** - * Context for Action's - */ - public static class ActionContext { - private IntegrationTestingUtility util; - - public ActionContext(IntegrationTestingUtility util) { - this.util = util; - } - - public IntegrationTestingUtility getHaseIntegrationTestingUtility() { - return util; - } - - public HBaseCluster getHBaseCluster() { - return util.getHBaseClusterInterface(); - } - } - - /** - * A (possibly mischievous) action that the ChaosMonkey can perform. - */ - public static class Action { - // TODO: interesting question - should actions be implemented inside - // ChaosMonkey, or outside? If they are inside (initial), the class becomes - // huge and all-encompassing; if they are outside ChaosMonkey becomes just - // a random task scheduler. For now, keep inside. - - protected ActionContext context; - protected HBaseCluster cluster; - protected ClusterStatus initialStatus; - protected ServerName[] initialServers; - - public void init(ActionContext context) throws IOException { - this.context = context; - cluster = context.getHBaseCluster(); - initialStatus = cluster.getInitialClusterStatus(); - Collection regionServers = initialStatus.getServers(); - initialServers = regionServers.toArray(new ServerName[regionServers.size()]); - } - - public void perform() throws Exception { }; - - // TODO: perhaps these methods should be elsewhere? - /** Returns current region servers */ - protected ServerName[] getCurrentServers() throws IOException { - Collection regionServers = cluster.getClusterStatus().getServers(); - if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {}; - return regionServers.toArray(new ServerName[regionServers.size()]); - } - - protected void killMaster(ServerName server) throws IOException { - LOG.info("Killing master:" + server); - cluster.killMaster(server); - cluster.waitForMasterToStop(server, TIMEOUT); - LOG.info("Killed master server:" + server); - } - - protected void startMaster(ServerName server) throws IOException { - LOG.info("Starting master:" + server.getHostname()); - cluster.startMaster(server.getHostname()); - cluster.waitForActiveAndReadyMaster(TIMEOUT); - LOG.info("Started master: " + server); - } - - protected void killRs(ServerName server) throws IOException { - LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); - cluster.waitForRegionServerToStop(server, TIMEOUT); - LOG.info("Killed region server:" + server + ". Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - - protected void startRs(ServerName server) throws IOException { - LOG.info("Starting region server:" + server.getHostname()); - cluster.startRegionServer(server.getHostname()); - cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT); - LOG.info("Started region server:" + server + ". Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - - protected void unbalanceRegions(ClusterStatus clusterStatus, - List fromServers, List toServers, - double fractionOfRegions) throws Exception { - List victimRegions = new LinkedList(); - for (ServerName server : fromServers) { - ServerLoad serverLoad = clusterStatus.getLoad(server); - // Ugh. - List regions = new LinkedList(serverLoad.getRegionsLoad().keySet()); - int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); - LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); - for (int i = 0; i < victimRegionCount; ++i) { - int victimIx = RandomUtils.nextInt(regions.size()); - String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); - victimRegions.add(Bytes.toBytes(regionId)); - } - } - - LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() - + " servers to " + toServers.size() + " different servers"); - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - for (byte[] victimRegion : victimRegions) { - int targetIx = RandomUtils.nextInt(toServers.size()); - admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); - } - } - - protected void forceBalancer() throws Exception { - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - boolean result = admin.balancer(); - if (!result) { - LOG.error("Balancer didn't succeed"); - } - } - } - - private static class RestartActionBase extends Action { - long sleepTime; // how long should we sleep - - public RestartActionBase(long sleepTime) { - this.sleepTime = sleepTime; - } - - void sleep(long sleepTime) { - LOG.info("Sleeping for:" + sleepTime); - Threads.sleep(sleepTime); - } - - void restartMaster(ServerName server, long sleepTime) throws IOException { - sleepTime = Math.max(sleepTime, 1000); - killMaster(server); - sleep(sleepTime); - startMaster(server); - } - - void restartRs(ServerName server, long sleepTime) throws IOException { - sleepTime = Math.max(sleepTime, 1000); - killRs(server); - sleep(sleepTime); - startRs(server); - } - } - - public static class RestartActiveMaster extends RestartActionBase { - public RestartActiveMaster(long sleepTime) { - super(sleepTime); - } - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart active master"); - - ServerName master = cluster.getClusterStatus().getMaster(); - restartMaster(master, sleepTime); - } - } - - public static class RestartRandomRs extends RestartActionBase { - public RestartRandomRs(long sleepTime) { - super(sleepTime); - } - - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart random region server"); - ServerName server = selectRandomItem(getCurrentServers()); - - restartRs(server, sleepTime); - } - } - - public static class RestartRsHoldingMeta extends RestartActionBase { - public RestartRsHoldingMeta(long sleepTime) { - super(sleepTime); - } - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart region server holding META"); - ServerName server = cluster.getServerHoldingMeta(); - if (server == null) { - LOG.warn("No server is holding .META. right now."); - return; - } - restartRs(server, sleepTime); - } - } - - public static class RestartRsHoldingTable extends RestartActionBase { - - private final String tableName; - - public RestartRsHoldingTable(long sleepTime, String tableName) { - super(sleepTime); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HTable table = null; - try { - Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration(); - table = new HTable(conf, tableName); - } catch (IOException e) { - LOG.debug("Error creating HTable used to get list of region locations.", e); - return; - } - - Collection serverNames = table.getRegionLocations().values(); - ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]); - - restartRs(nameArray[RandomUtils.nextInt(nameArray.length)], sleepTime); - } - } - - public static class MoveRegionsOfTable extends Action { - private final long sleepTime; - private final byte[] tableNameBytes; - private final String tableName; - - public MoveRegionsOfTable(String tableName) { - this(-1, tableName); - } - - public MoveRegionsOfTable(long sleepTime, String tableName) { - this.sleepTime = sleepTime; - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - - List regions = admin.getTableRegions(tableNameBytes); - Collection serversList = admin.getClusterStatus().getServers(); - ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]); - - LOG.info("Performing action: Move regions of table " + tableName); - for (HRegionInfo regionInfo:regions) { - try { - String destServerName = - servers[RandomUtils.nextInt(servers.length)].getServerName(); - LOG.debug("Moving " + regionInfo.getRegionNameAsString() + " to " + destServerName); - admin.move(regionInfo.getRegionName(), Bytes.toBytes(destServerName)); - } catch (Exception e) { - LOG.debug("Error moving region", e); - } - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class MoveRandomRegionOfTable extends Action { - private final long sleepTime; - private final byte[] tableNameBytes; - private final String tableName; - - public MoveRandomRegionOfTable(String tableName) { - this(-1, tableName); - } - - public MoveRandomRegionOfTable(long sleepTime, String tableName) { - this.sleepTime = sleepTime; - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Move random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Unassigning region " + region.getRegionNameAsString()); - admin.unassign(region.getRegionName(), false); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class SplitRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public SplitRandomRegionOfTable(String tableName) { - this(-1, tableName); - } - - public SplitRandomRegionOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Split random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Splitting region " + region.getRegionNameAsString()); - admin.split(region.getRegionName()); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class MergeRandomAdjacentRegionsOfTable extends Action { - private final byte[] tableNameBytes; - private final String tableName; - private final long sleepTime; - - public MergeRandomAdjacentRegionsOfTable(String tableName) { - this(-1, tableName); - } - - public MergeRandomAdjacentRegionsOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - this.sleepTime = sleepTime; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Merge random adjacent regions of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - if (regions.size() < 2) { - LOG.info("Table " + tableName + " doesn't have enough region to merge"); - return; - } - - int i = RandomUtils.nextInt(regions.size() - 1); - HRegionInfo a = regions.get(i++); - HRegionInfo b = regions.get(i); - LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString()); - admin.mergeRegions(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class CompactTable extends Action { - private final byte[] tableNameBytes; - private final int majorRatio; - private final long sleepTime; - private final String tableName; - - public CompactTable( - String tableName, float majorRatio) { - this(-1, tableName, majorRatio); - } - - public CompactTable( - int sleepTime, String tableName, float majorRatio) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.majorRatio = (int) (100 * majorRatio); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - boolean major = RandomUtils.nextInt(100) < majorRatio; - - LOG.info("Performing action: Compact table " + tableName + ", major=" + major); - if (major) { - admin.majorCompact(tableNameBytes); - } else { - admin.compact(tableNameBytes); - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class CompactRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final int majorRatio; - private final long sleepTime; - private final String tableName; - - public CompactRandomRegionOfTable( - String tableName, float majorRatio) { - this(-1, tableName, majorRatio); - } - - public CompactRandomRegionOfTable( - int sleepTime, String tableName, float majorRatio) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.majorRatio = (int) (100 * majorRatio); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - List regions = admin.getTableRegions(tableNameBytes); - boolean major = RandomUtils.nextInt(100) < majorRatio; - - LOG.info("Performing action: Compact random region of table " - + tableName + ", major=" + major); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - - if (major) { - LOG.debug("Major compacting region " + region.getRegionNameAsString()); - admin.majorCompact(region.getRegionName()); - } else { - LOG.debug("Compacting region " + region.getRegionNameAsString()); - admin.compact(region.getRegionName()); - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class FlushTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public FlushTable(String tableName) { - this(-1, tableName); - } - - public FlushTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Flush table " + tableName); - admin.flush(tableNameBytes); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class FlushRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public FlushRandomRegionOfTable(String tableName) { - this (-1, tableName); - } - - public FlushRandomRegionOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Flush random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Flushing region " + region.getRegionNameAsString()); - admin.flush(region.getRegionName()); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class SnapshotTable extends Action { - private final String tableName; - private final long sleepTime; - - public SnapshotTable(String tableName) { - this(-1, tableName); - } - - public SnapshotTable(int sleepTime, String tableName) { - this.tableName = tableName; - this.sleepTime = sleepTime; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - String snapshotName = tableName + "-it-" + System.currentTimeMillis(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Snapshot table " + tableName); - admin.snapshot(snapshotName, tableName); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - - /** - * Restarts a ratio of the running regionservers at the same time - */ - public static class BatchRestartRs extends RestartActionBase { - float ratio; //ratio of regionservers to restart - - public BatchRestartRs(long sleepTime, float ratio) { - super(sleepTime); - this.ratio = ratio; - } - - @Override - public void perform() throws Exception { - LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", - (int)(ratio * 100))); - List selectedServers = selectRandomItems(getCurrentServers(), ratio); - - for (ServerName server : selectedServers) { - LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); - } - - for (ServerName server : selectedServers) { - cluster.waitForRegionServerToStop(server, TIMEOUT); - } - - LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - - sleep(sleepTime); - - for (ServerName server : selectedServers) { - LOG.info("Starting region server:" + server.getHostname()); - cluster.startRegionServer(server.getHostname()); - - } - for (ServerName server : selectedServers) { - cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT); - } - LOG.info("Started " + selectedServers.size() +" region servers. Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - } - - /** - * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a - * server, or starts one, sleeping randomly (0-sleepTime) in between steps. - */ - public static class RollingBatchRestartRs extends BatchRestartRs { - public RollingBatchRestartRs(long sleepTime, float ratio) { - super(sleepTime, ratio); - } - - @Override - public void perform() throws Exception { - LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", - (int)(ratio * 100))); - List selectedServers = selectRandomItems(getCurrentServers(), ratio); - - Queue serversToBeKilled = new LinkedList(selectedServers); - Queue deadServers = new LinkedList(); - - // - while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) { - boolean action = true; //action true = kill server, false = start server - - if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) { - action = deadServers.isEmpty(); - } else { - action = RandomUtils.nextBoolean(); - } - - if (action) { - ServerName server = serversToBeKilled.remove(); - killRs(server); - deadServers.add(server); - } else { - ServerName server = deadServers.remove(); - startRs(server); - } - - sleep(RandomUtils.nextInt((int)sleepTime)); - } - } - } - - public static class UnbalanceRegionsAction extends Action { - private double fractionOfRegions; - private double fractionOfServers; - - /** - * Unbalances the regions on the cluster by choosing "target" servers, and moving - * some regions from each of the non-target servers to random target servers. - * @param fractionOfRegions Fraction of regions to move from each server. - * @param fractionOfServers Fraction of servers to be chosen as targets. - */ - public UnbalanceRegionsAction(double fractionOfRegions, double fractionOfServers) { - this.fractionOfRegions = fractionOfRegions; - this.fractionOfServers = fractionOfServers; - } - - @Override - public void perform() throws Exception { - LOG.info("Unbalancing regions"); - ClusterStatus status = this.cluster.getClusterStatus(); - List victimServers = new LinkedList(status.getServers()); - int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); - List targetServers = new ArrayList(targetServerCount); - for (int i = 0; i < targetServerCount; ++i) { - int victimIx = RandomUtils.nextInt(victimServers.size()); - targetServers.add(victimServers.remove(victimIx)); - } - unbalanceRegions(status, victimServers, targetServers, fractionOfRegions); - } - } - - public static class ForceBalancerAction extends Action { - @Override - public void perform() throws Exception { - LOG.info("Balancing regions"); - forceBalancer(); - } - } - - public static class AddColumnAction extends ChaosMonkey.Action { - - private byte[] tableName; - private HBaseAdmin admin; - - public AddColumnAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor columnDescriptor = null; - - while(columnDescriptor == null || - tableDescriptor.getFamily(columnDescriptor.getName()) != null) { - columnDescriptor = new HColumnDescriptor(RandomStringUtils.randomAlphabetic(5)); - } - - tableDescriptor.addFamily(columnDescriptor); - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class RemoveColumnAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public RemoveColumnAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if (columnDescriptors.length <= 1) { - return; - } - - int index = random.nextInt(columnDescriptors.length); - while(columnDescriptors[index].getNameAsString().equals( - Bytes.toString(LoadTestTool.COLUMN_FAMILY))) { - index = random.nextInt(columnDescriptors.length); - } - - tableDescriptor.removeFamily(columnDescriptors[index].getName()); - - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class ChangeVersionsAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public ChangeVersionsAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if ( columnDescriptors == null || columnDescriptors.length == 0) { - return; - } - - int versions = random.nextInt(3) + 1; - for(HColumnDescriptor descriptor:columnDescriptors) { - descriptor.setMaxVersions(versions); - descriptor.setMinVersions(versions); - } - - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class ChangeEncodingAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public ChangeEncodingAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if (columnDescriptors == null || columnDescriptors.length == 0) { - return; - } - - // possible DataBlockEncoding id's - int[] possibleIds = {0, 2, 3, 4, 6}; - for (HColumnDescriptor descriptor : columnDescriptors) { - short id = (short) possibleIds[random.nextInt(possibleIds.length)]; - descriptor.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id)); - } - - admin.modifyTable(tableName, tableDescriptor); - } - } - - /** - * A context for a Policy - */ - public static class PolicyContext extends ActionContext { - public PolicyContext(IntegrationTestingUtility util) { - super(util); - } - } - - /** - * A policy to introduce chaos to the cluster - */ - public static abstract class Policy extends StoppableImplementation implements Runnable { - protected PolicyContext context; - - public void init(PolicyContext context) throws Exception { - this.context = context; - } - } - - /** A policy that runs multiple other policies one after the other */ - public static class CompositeSequentialPolicy extends Policy { - private List policies; - public CompositeSequentialPolicy(Policy... policies) { - this.policies = Arrays.asList(policies); - } - - @Override - public void stop(String why) { - super.stop(why); - for (Policy p : policies) { - p.stop(why); - } - } - - @Override - public void run() { - for (Policy p : policies) { - p.run(); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Policy p : policies) { - p.init(context); - } - } - } - - /** A policy which does stuff every time interval. */ - public static abstract class PeriodicPolicy extends Policy { - private long periodMs; - - public PeriodicPolicy(long periodMs) { - this.periodMs = periodMs; - } - - @Override - public void run() { - // Add some jitter. - int jitter = RandomUtils.nextInt((int)periodMs); - LOG.info("Sleeping for " + jitter + " to add jitter"); - Threads.sleep(jitter); - - while (!isStopped()) { - long start = System.currentTimeMillis(); - runOneIteration(); - - if (isStopped()) return; - long sleepTime = periodMs - (System.currentTimeMillis() - start); - if (sleepTime > 0) { - LOG.info("Sleeping for: " + sleepTime); - Threads.sleep(sleepTime); - } - } - } - - protected abstract void runOneIteration(); - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - LOG.info("Using ChaosMonkey Policy: " + this.getClass() + ", period: " + periodMs); - } - } - - - /** A policy which performs a sequence of actions deterministically. */ - public static class DoActionsOncePolicy extends PeriodicPolicy { - private List actions; - - public DoActionsOncePolicy(long periodMs, List actions) { - super(periodMs); - this.actions = new ArrayList(actions); - } - - public DoActionsOncePolicy(long periodMs, Action... actions) { - this(periodMs, Arrays.asList(actions)); - } - - @Override - protected void runOneIteration() { - if (actions.isEmpty()) { - this.stop("done"); - return; - } - Action action = actions.remove(0); - - try { - action.perform(); - } catch (Exception ex) { - LOG.warn("Exception occured during performing action: " - + StringUtils.stringifyException(ex)); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Action action : actions) { - action.init(this.context); - } - } - } - - /** - * A policy, which picks a random action according to the given weights, - * and performs it every configurable period. - */ - public static class PeriodicRandomActionPolicy extends PeriodicPolicy { - private List> actions; - - public PeriodicRandomActionPolicy(long periodMs, List> actions) { - super(periodMs); - this.actions = actions; - } - - public PeriodicRandomActionPolicy(long periodMs, Pair... actions) { - // We don't expect it to be modified. - this(periodMs, Arrays.asList(actions)); - } - - public PeriodicRandomActionPolicy(long periodMs, Action... actions) { - super(periodMs); - this.actions = new ArrayList>(actions.length); - for (Action action : actions) { - this.actions.add(new Pair(action, 1)); - } - } - - @Override - protected void runOneIteration() { - Action action = selectWeightedRandomItem(actions); - try { - action.perform(); - } catch (Exception ex) { - LOG.warn("Exception occured during performing action: " - + StringUtils.stringifyException(ex)); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Pair action : actions) { - action.getFirst().init(this.context); - } - } - } - - /** Selects a random item from the given items */ - static T selectRandomItem(T[] items) { - return items[RandomUtils.nextInt(items.length)]; - } - - /** Selects a random item from the given items with weights*/ - static T selectWeightedRandomItem(List> items) { - int totalWeight = 0; - for (Pair pair : items) { - totalWeight += pair.getSecond(); - } - - int cutoff = RandomUtils.nextInt(totalWeight); - int cummulative = 0; - T item = null; - - //warn: O(n) - for (int i=0; i List selectRandomItems(T[] items, float ratio) { - int remaining = (int)Math.ceil(items.length * ratio); - - List selectedItems = new ArrayList(remaining); - - for (int i=0; i 0; i++) { - if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) { - selectedItems.add(items[i]); - remaining--; - } - } - - return selectedItems; - } - - /** - * All actions that deal with RS's with the following weights (relative probabilities): - * - Restart active master (sleep 5 sec) : 2 - * - Restart random regionserver (sleep 5 sec) : 2 - * - Restart random regionserver (sleep 60 sec) : 2 - * - Restart META regionserver (sleep 5 sec) : 1 - * - Batch restart of 50% of regionservers (sleep 5 sec) : 2 - * - Rolling restart of 100% of regionservers (sleep 5 sec) : 2 - */ - @SuppressWarnings("unchecked") - private static final List> ALL_ACTIONS = Lists.newArrayList( - new Pair(new RestartActiveMaster(FIVE_SEC), 2), - new Pair(new RestartRandomRs(FIVE_SEC), 2), - new Pair(new RestartRandomRs(ONE_MIN), 2), - new Pair(new RestartRsHoldingMeta(FIVE_SEC), 1), - new Pair(new BatchRestartRs(FIVE_SEC, 0.5f), 2), - new Pair(new RollingBatchRestartRs(FIVE_SEC, 1.0f), 2) - ); - - public static final String EVERY_MINUTE_RANDOM_ACTION_POLICY = "EVERY_MINUTE_RANDOM_ACTION_POLICY"; - - private Policy[] policies; - private Thread[] monkeyThreads; - - public void start() throws Exception { - monkeyThreads = new Thread[policies.length]; - - for (int i=0; i NAMED_POLICIES = Maps.newHashMap(); - static { - NAMED_POLICIES.put(EVERY_MINUTE_RANDOM_ACTION_POLICY, - new PeriodicRandomActionPolicy(ONE_MIN, ALL_ACTIONS)); - } - - @Override - protected void addOptions() { - addOptWithArg("policy", "a named policy defined in ChaosMonkey.java. Possible values: " - + NAMED_POLICIES.keySet()); - //we can add more options, and make policies more configurable - } - - @Override - protected void processOptions(CommandLine cmd) { - String[] policies = cmd.getOptionValues("policy"); - if (policies != null) { - setPoliciesByName(policies); - } - } - - @Override - protected int doWork() throws Exception { - start(); - waitForStop(); - return 0; - } - - public static void main(String[] args) throws Exception { - Configuration conf = HBaseConfiguration.create(); - IntegrationTestingUtility.setUseDistributedCluster(conf); - IntegrationTestingUtility util = new IntegrationTestingUtility(conf); - util.initializeCluster(1); - - ChaosMonkey monkey = new ChaosMonkey(util, EVERY_MINUTE_RANDOM_ACTION_POLICY); - int ret = ToolRunner.run(conf, monkey, args); - System.exit(ret); - } -} diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java index 1e6c254..229470f 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java @@ -56,7 +56,7 @@ public class LoadTestTool extends AbstractHBaseTool { protected static final String DEFAULT_TABLE_NAME = "cluster_test"; /** Column family used by the test */ - protected static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf"); + public static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf"); /** Column families used by the test */ protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY }; -- 1.7.10.2 (Apple Git-33)