From ce42b613b12c59c542f8c1826f747c31faa850de Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Tue, 13 Aug 2013 13:37:35 -0700 Subject: [PATCH] Starting to make Chaos monkey more easily configurable. --- ...IntegrationTestDataIngestSlowDeterministic.java | 70 +- .../IntegrationTestDataIngestWithChaosMonkey.java | 109 -- ...grationTestRebalanceAndKillServersTargeted.java | 55 +- .../apache/hadoop/hbase/chaos/actions/Action.java | 150 +++ .../hbase/chaos/actions/AddColumnAction.java | 63 + .../hbase/chaos/actions/BatchRestartRsAction.java | 69 ++ .../hbase/chaos/actions/ChangeEncodingAction.java | 69 ++ .../hbase/chaos/actions/ChangeVersionsAction.java | 69 ++ .../actions/CompactRandomRegionOfTableAction.java | 75 ++ .../hbase/chaos/actions/CompactTableAction.java | 63 + .../actions/FlushRandomRegionOfTableAction.java | 62 + .../hbase/chaos/actions/FlushTableAction.java | 54 + .../hbase/chaos/actions/ForceBalancerAction.java | 30 + .../MergeRandomAdjacentRegionsOfTableAction.java | 68 ++ .../actions/MoveRandomRegionOfTableAction.java | 62 + .../chaos/actions/MoveRegionsOfTableAction.java | 71 ++ .../hbase/chaos/actions/RemoveColumnAction.java | 70 ++ .../chaos/actions/RestartActionBaseAction.java | 54 + .../chaos/actions/RestartActiveMasterAction.java | 37 + .../hbase/chaos/actions/RestartRandomRsAction.java | 39 + .../chaos/actions/RestartRsHoldingMetaAction.java | 40 + .../chaos/actions/RestartRsHoldingTableAction.java | 57 + .../chaos/actions/RollingBatchRestartRsAction.java | 70 ++ .../hbase/chaos/actions/SnapshotTableAction.java | 52 + .../actions/SplitRandomRegionOfTableAction.java | 62 + .../actions/UnbalanceKillAndRebalanceAction.java | 50 + .../chaos/actions/UnbalanceRegionsAction.java | 60 + .../hbase/chaos/factories/CalmMonkeyFactory.java | 15 + .../hbase/chaos/factories/MonkeyFactory.java | 49 + .../factories/SlowDeterministicMonkeyFactory.java | 73 ++ .../hbase/chaos/monkies/CalmChaosMonkey.java | 27 + .../hadoop/hbase/chaos/monkies/ChaosMonkey.java | 16 + .../chaos/monkies/PolicyBasedChaosMonkey.java | 164 +++ .../chaos/policies/CompositeSequentialPolicy.java | 53 + .../hbase/chaos/policies/DoActionsOncePolicy.java | 64 ++ .../hbase/chaos/policies/PeriodicPolicy.java | 59 + .../chaos/policies/PeriodicRandomActionPolicy.java | 73 ++ .../apache/hadoop/hbase/chaos/policies/Policy.java | 48 + .../hadoop/hbase/mttr/IntegrationTestMTTR.java | 30 +- ...ntegrationTestBigLinkedListWithChaosMonkey.java | 41 +- .../org/apache/hadoop/hbase/util/ChaosMonkey.java | 1213 -------------------- .../org/apache/hadoop/hbase/util/LoadTestTool.java | 2 +- 42 files changed, 2200 insertions(+), 1457 deletions(-) delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/CompositeSequentialPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java delete mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java index a3ab48c..38f3dd9 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestSlowDeterministic.java @@ -17,25 +17,10 @@ */ package org.apache.hadoop.hbase; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.Action; -import org.apache.hadoop.hbase.util.ChaosMonkey.BatchRestartRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompactRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompactTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.CompositeSequentialPolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.DoActionsOncePolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.FlushRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.FlushTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MergeRandomAdjacentRegionsOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MoveRandomRegionOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.MoveRegionsOfTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.PeriodicRandomActionPolicy; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartActiveMaster; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartRandomRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.RestartRsHoldingMeta; -import org.apache.hadoop.hbase.util.ChaosMonkey.RollingBatchRestartRs; -import org.apache.hadoop.hbase.util.ChaosMonkey.SnapshotTable; -import org.apache.hadoop.hbase.util.ChaosMonkey.SplitRandomRegionOfTable; +import com.google.common.collect.Sets; +import org.apache.hadoop.hbase.chaos.factories.MonkeyFactory; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.LoadTestTool; import org.junit.After; import org.junit.Before; @@ -58,47 +43,12 @@ public class IntegrationTestDataIngestSlowDeterministic extends IngestIntegratio @Before public void setUp() throws Exception { super.setUp(SERVER_COUNT); - - // Actions such as compact/flush a table/region, - // move one region around. They are not so destructive, - // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) - }; - - // Actions such as split/merge/snapshot. - // They should not cause data loss, or unreliability - // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new ChaosMonkey.AddColumnAction(tableName), - new ChaosMonkey.RemoveColumnAction(tableName), - new ChaosMonkey.ChangeEncodingAction(tableName), - new ChaosMonkey.ChangeVersionsAction(tableName) - }; - - // Destructive actions to mess things around. - Action[] actions3 = new Action[] { - new MoveRegionsOfTable(tableName), - new RestartRandomRs(60000), - new BatchRestartRs(5000, 0.5f), - new RestartActiveMaster(5000), - new RollingBatchRestartRs(5000, 1.0f), - new RestartRsHoldingMeta(35000) - }; - - monkey = new ChaosMonkey(util, - new PeriodicRandomActionPolicy(60 * 1000, actions1), - new PeriodicRandomActionPolicy(90 * 1000, actions2), - new CompositeSequentialPolicy( - new DoActionsOncePolicy(150 * 1000, actions3), - new PeriodicRandomActionPolicy(150 * 1000, actions3))); + monkey = MonkeyFactory.getFactory("slowDeterministic") + .setTableName(tableName) + .setUtil(util) + .setColumnFamilies( + Sets.newHashSet(Bytes.toString(LoadTestTool.COLUMN_FAMILY) ) + ).build(); monkey.start(); } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java deleted file mode 100644 index 9bc0e56..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestDataIngestWithChaosMonkey.java +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.*; -import org.apache.hadoop.hbase.util.LoadTestTool; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -/** - * A system test which does large data ingestion and verify using {@link LoadTestTool}, - * while killing the region servers and the master(s) randomly. You can configure how long - * should the load test run by using "hbase.IntegrationTestDataIngestWithChaosMonkey.runtime" - * configuration parameter. - */ -@Category(IntegrationTests.class) -public class IntegrationTestDataIngestWithChaosMonkey extends IngestIntegrationTestBase { - - private static int NUM_SLAVES_BASE = 4; //number of slaves for the smallest cluster - - // run for 10 min by default - private static final long DEFAULT_RUN_TIME = 10 * 60 * 1000; - - private ChaosMonkey monkey; - - @Before - public void setUp() throws Exception { - util= getTestingUtil(null); - Configuration conf = util.getConfiguration(); - if (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, - HConstants.DEFAULT_DISTRIBUTED_LOG_REPLAY_CONFIG)) { - // when distributedLogReplay is enabled, we need to make sure rpc timeout & retires are - // smaller enough in order for the replay can complete before ChaosMonkey kills another region - // server - conf.setInt("hbase.log.replay.retries.number", 2); - conf.setInt("hbase.log.replay.rpc.timeout", 2000); - conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true); - } - if(!util.isDistributedCluster()) { - NUM_SLAVES_BASE = 5; - } - super.setUp(NUM_SLAVES_BASE); - - // Actions such as compact/flush a table/region, - // move one region around. They are not so destructive, - // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) - }; - - // Actions such as split/merge/snapshot. - // They should not cause data loss, or unreliability - // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new MoveRegionsOfTable(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName), - new ChangeEncodingAction(tableName), - new ChangeVersionsAction(tableName) - }; - - monkey = new ChaosMonkey(util, - new PeriodicRandomActionPolicy(30 * 1000, actions1), - new PeriodicRandomActionPolicy(60 * 1000, actions2), - ChaosMonkey.getPolicyByName(ChaosMonkey.EVERY_MINUTE_RANDOM_ACTION_POLICY)); - monkey.start(); - } - - @After - public void tearDown() throws Exception { - if (monkey != null) { - monkey.stop("test has finished, that's why"); - monkey.waitForStop(); - } - super.tearDown(); - } - - @Test - public void testDataIngest() throws Exception { - runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 100, 20); - } -} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java index d302ebe..5880d5d 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.java @@ -18,17 +18,15 @@ package org.apache.hadoop.hbase; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -import org.apache.commons.lang.math.RandomUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.chaos.actions.UnbalanceKillAndRebalanceAction; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; +import org.apache.hadoop.hbase.chaos.policies.Policy; import org.apache.hadoop.hbase.client.HConnectionManager; -import org.apache.hadoop.hbase.util.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; import org.apache.hadoop.hbase.util.LoadTestTool; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -54,54 +52,15 @@ public class IntegrationTestRebalanceAndKillServersTargeted extends IngestIntegr private ChaosMonkey monkey; - /** This action is too specific to put in ChaosMonkey; put it here */ - static class UnbalanceKillAndRebalanceAction extends ChaosMonkey.Action { - /** Fractions of servers to get regions and live and die respectively; from all other - * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ - private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; - private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; - private static final double HOARD_FRC_OF_REGIONS = 0.8; - /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance - * and restarting the servers; to make sure these events have time to impact the cluster. */ - private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; - private static final long WAIT_FOR_KILLS_MS = 2 * 1000; - private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; - - @Override - public void perform() throws Exception { - ClusterStatus status = this.cluster.getClusterStatus(); - List victimServers = new LinkedList(status.getServers()); - int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); - int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); - Assert.assertTrue((liveCount + deadCount) < victimServers.size()); - List targetServers = new ArrayList(liveCount); - for (int i = 0; i < liveCount + deadCount; ++i) { - int victimIx = RandomUtils.nextInt(victimServers.size()); - targetServers.add(victimServers.remove(victimIx)); - } - unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); - Thread.sleep(WAIT_FOR_UNBALANCE_MS); - for (int i = 0; i < liveCount; ++i) { - killRs(targetServers.get(i)); - } - Thread.sleep(WAIT_FOR_KILLS_MS); - forceBalancer(); - Thread.sleep(WAIT_AFTER_BALANCE_MS); - for (int i = 0; i < liveCount; ++i) { - startRs(targetServers.get(i)); - } - } - } - @Before public void setUp() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.set(HConnectionManager.RETRIES_BY_SERVER_KEY, "true"); super.setUp(NUM_SLAVES_BASE, conf); - ChaosMonkey.Policy chaosPolicy = new ChaosMonkey.PeriodicRandomActionPolicy( + Policy chaosPolicy = new PeriodicRandomActionPolicy( CHAOS_EVERY_MS, new UnbalanceKillAndRebalanceAction()); - monkey = new ChaosMonkey(util, chaosPolicy); + monkey = new PolicyBasedChaosMonkey(util, chaosPolicy); monkey.start(); } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java new file mode 100644 index 0000000..3146499 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.HBaseCluster; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.ServerLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; + +/** + * A (possibly mischievous) action that the ChaosMonkey can perform. + */ +public class Action { + + protected static Log LOG = LogFactory.getLog(Action.class); + + protected ActionContext context; + protected HBaseCluster cluster; + protected ClusterStatus initialStatus; + protected ServerName[] initialServers; + + public void init(ActionContext context) throws IOException { + this.context = context; + cluster = context.getHBaseCluster(); + initialStatus = cluster.getInitialClusterStatus(); + Collection regionServers = initialStatus.getServers(); + initialServers = regionServers.toArray(new ServerName[regionServers.size()]); + } + + public void perform() throws Exception { }; + + /** Returns current region servers */ + protected ServerName[] getCurrentServers() throws IOException { + Collection regionServers = cluster.getClusterStatus().getServers(); + if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {}; + return regionServers.toArray(new ServerName[regionServers.size()]); + } + + protected void killMaster(ServerName server) throws IOException { + LOG.info("Killing master:" + server); + cluster.killMaster(server); + cluster.waitForMasterToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Killed master server:" + server); + } + + protected void startMaster(ServerName server) throws IOException { + LOG.info("Starting master:" + server.getHostname()); + cluster.startMaster(server.getHostname()); + cluster.waitForActiveAndReadyMaster(PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Started master: " + server); + } + + protected void killRs(ServerName server) throws IOException { + LOG.info("Killing region server:" + server); + cluster.killRegionServer(server); + cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Killed region server:" + server + ". Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } + + protected void startRs(ServerName server) throws IOException { + LOG.info("Starting region server:" + server.getHostname()); + cluster.startRegionServer(server.getHostname()); + cluster.waitForRegionServerToStart(server.getHostname(), PolicyBasedChaosMonkey.TIMEOUT); + LOG.info("Started region server:" + server + ". Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } + + protected void unbalanceRegions(ClusterStatus clusterStatus, + List fromServers, List toServers, + double fractionOfRegions) throws Exception { + List victimRegions = new LinkedList(); + for (ServerName server : fromServers) { + ServerLoad serverLoad = clusterStatus.getLoad(server); + // Ugh. + List regions = new LinkedList(serverLoad.getRegionsLoad().keySet()); + int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); + LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); + for (int i = 0; i < victimRegionCount; ++i) { + int victimIx = RandomUtils.nextInt(regions.size()); + String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); + victimRegions.add(Bytes.toBytes(regionId)); + } + } + + LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() + + " servers to " + toServers.size() + " different servers"); + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + for (byte[] victimRegion : victimRegions) { + int targetIx = RandomUtils.nextInt(toServers.size()); + admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); + } + } + + protected void forceBalancer() throws Exception { + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + boolean result = admin.balancer(); + if (!result) { + LOG.error("Balancer didn't succeed"); + } + } + + /** + * Context for Action's + */ + public static class ActionContext { + private IntegrationTestingUtility util; + + public ActionContext(IntegrationTestingUtility util) { + this.util = util; + } + + public IntegrationTestingUtility getHaseIntegrationTestingUtility() { + return util; + } + + public HBaseCluster getHBaseCluster() { + return util.getHBaseClusterInterface(); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java new file mode 100644 index 0000000..73129b2 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/AddColumnAction.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.RandomStringUtils; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.util.List; +import java.util.Random; + +/** + * Action the adds a column family to a table. + */ +public class AddColumnAction extends Action { + private final Random random; + private final byte[] tableName; + private HBaseAdmin admin; + + public AddColumnAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor columnDescriptor = null; + + while(columnDescriptor == null || + tableDescriptor.getFamily(columnDescriptor.getName()) != null) { + columnDescriptor = new HColumnDescriptor(RandomStringUtils.randomAlphabetic(5)); + } + + tableDescriptor.addFamily(columnDescriptor); + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java new file mode 100644 index 0000000..7931552 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +import java.util.List; + +/** + * Restarts a ratio of the running regionservers at the same time + */ +public class BatchRestartRsAction extends RestartActionBaseAction { + float ratio; //ratio of regionservers to restart + + public BatchRestartRsAction(long sleepTime, float ratio) { + super(sleepTime); + this.ratio = ratio; + } + + @Override + public void perform() throws Exception { + LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", + (int)(ratio * 100))); + List selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), + ratio); + + for (ServerName server : selectedServers) { + LOG.info("Killing region server:" + server); + cluster.killRegionServer(server); + } + + for (ServerName server : selectedServers) { + cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + } + + LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + + sleep(sleepTime); + + for (ServerName server : selectedServers) { + LOG.info("Starting region server:" + server.getHostname()); + cluster.startRegionServer(server.getHostname()); + + } + for (ServerName server : selectedServers) { + cluster.waitForRegionServerToStart(server.getHostname(), PolicyBasedChaosMonkey.TIMEOUT); + } + LOG.info("Started " + selectedServers.size() +" region servers. Reported num of rs:" + + cluster.getClusterStatus().getServersSize()); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java new file mode 100644 index 0000000..6dd0516 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeEncodingAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.util.List; +import java.util.Random; + +/** + * Action that changes the encoding on a column family from a list of tables. + */ +public class ChangeEncodingAction extends Action { + private final byte[] tableName; + + private HBaseAdmin admin; + private Random random; + + public ChangeEncodingAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if (columnDescriptors == null || columnDescriptors.length == 0) { + return; + } + + // possible DataBlockEncoding id's + int[] possibleIds = {0, 2, 3, 4, 6}; + for (HColumnDescriptor descriptor : columnDescriptors) { + short id = (short) possibleIds[random.nextInt(possibleIds.length)]; + descriptor.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id)); + } + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java new file mode 100644 index 0000000..cbdcce9 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ChangeVersionsAction.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.util.List; +import java.util.Random; + +/** + * Action that changes the number of versions on a column family from a list of tables. + * + * Always keeps at least 1 as the number of versions. + */ +public class ChangeVersionsAction extends Action { + private final byte[] tableName; + + private HBaseAdmin admin; + private Random random; + + public ChangeVersionsAction(String tableName) { + this.tableName = Bytes.toBytes(tableName); + this.random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if ( columnDescriptors == null || columnDescriptors.length == 0) { + return; + } + + int versions = random.nextInt(3) + 1; + for(HColumnDescriptor descriptor:columnDescriptors) { + descriptor.setMaxVersions(versions); + descriptor.setMinVersions(versions); + } + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java new file mode 100644 index 0000000..da41d23 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactRandomRegionOfTableAction.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.List; + +/** + * Region that queues a compaction of a random region from the table. + */ +public class CompactRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final int majorRatio; + private final long sleepTime; + private final String tableName; + + public CompactRandomRegionOfTableAction( + String tableName, float majorRatio) { + this(-1, tableName, majorRatio); + } + + public CompactRandomRegionOfTableAction( + int sleepTime, String tableName, float majorRatio) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.majorRatio = (int) (100 * majorRatio); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + List regions = admin.getTableRegions(tableNameBytes); + boolean major = RandomUtils.nextInt(100) < majorRatio; + + LOG.info("Performing action: Compact random region of table " + + tableName + ", major=" + major); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + + if (major) { + LOG.debug("Major compacting region " + region.getRegionNameAsString()); + admin.majorCompact(region.getRegionName()); + } else { + LOG.debug("Compacting region " + region.getRegionNameAsString()); + admin.compact(region.getRegionName()); + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java new file mode 100644 index 0000000..0fae29b --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/CompactTableAction.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Created by eclark on 8/12/13. +*/ +public class CompactTableAction extends Action { + private final byte[] tableNameBytes; + private final int majorRatio; + private final long sleepTime; + private final String tableName; + + public CompactTableAction(String tableName, float majorRatio) { + this(-1, tableName, majorRatio); + } + + public CompactTableAction( + int sleepTime, String tableName, float majorRatio) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.majorRatio = (int) (100 * majorRatio); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + boolean major = RandomUtils.nextInt(100) < majorRatio; + + LOG.info("Performing action: Compact table " + tableName + ", major=" + major); + if (major) { + admin.majorCompact(tableNameBytes); + } else { + admin.compact(tableNameBytes); + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java new file mode 100644 index 0000000..3d1689c --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.List; + +/** +* Action that tries to flush a random region of a table. +*/ +public class FlushRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public FlushRandomRegionOfTableAction(String tableName) { + this (-1, tableName); + } + + public FlushRandomRegionOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Flush random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Flushing region " + region.getRegionNameAsString()); + admin.flush(region.getRegionName()); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java new file mode 100644 index 0000000..5e6ec1d --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/FlushTableAction.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +/** +* Action that tries to flush a table. +*/ +public class FlushTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public FlushTableAction(String tableName) { + this(-1, tableName); + } + + public FlushTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Flush table " + tableName); + admin.flush(tableNameBytes); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java new file mode 100644 index 0000000..9909c6e --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/ForceBalancerAction.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +/** +* Action that tries to force a balancer run. +*/ +public class ForceBalancerAction extends Action { + @Override + public void perform() throws Exception { + LOG.info("Balancing regions"); + forceBalancer(); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java new file mode 100644 index 0000000..e567526 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MergeRandomAdjacentRegionsOfTableAction.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.List; + +/** + * Action to merge regions of a table. + */ +public class MergeRandomAdjacentRegionsOfTableAction extends Action { + private final byte[] tableNameBytes; + private final String tableName; + private final long sleepTime; + + public MergeRandomAdjacentRegionsOfTableAction(String tableName) { + this(-1, tableName); + } + + public MergeRandomAdjacentRegionsOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + this.sleepTime = sleepTime; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Merge random adjacent regions of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + if (regions.size() < 2) { + LOG.info("Table " + tableName + " doesn't have enough region to merge"); + return; + } + + int i = RandomUtils.nextInt(regions.size() - 1); + HRegionInfo a = regions.get(i++); + HRegionInfo b = regions.get(i); + LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString()); + admin.mergeRegions(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java new file mode 100644 index 0000000..201240f --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.List; + +/** +* Action that tries to move a random region of a table. +*/ +public class MoveRandomRegionOfTableAction extends Action { + private final long sleepTime; + private final byte[] tableNameBytes; + private final String tableName; + + public MoveRandomRegionOfTableAction(String tableName) { + this(-1, tableName); + } + + public MoveRandomRegionOfTableAction(long sleepTime, String tableName) { + this.sleepTime = sleepTime; + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Move random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Unassigning region " + region.getRegionNameAsString()); + admin.unassign(region.getRegionName(), false); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java new file mode 100644 index 0000000..b0e3703 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/MoveRegionsOfTableAction.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.Collection; +import java.util.List; + +/** +* Action that tries to move every region of a table. +*/ +public class MoveRegionsOfTableAction extends Action { + private final long sleepTime; + private final byte[] tableNameBytes; + private final String tableName; + + public MoveRegionsOfTableAction(String tableName) { + this(-1, tableName); + } + + public MoveRegionsOfTableAction(long sleepTime, String tableName) { + this.sleepTime = sleepTime; + this.tableNameBytes = Bytes.toBytes(tableName); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + + List regions = admin.getTableRegions(tableNameBytes); + Collection serversList = admin.getClusterStatus().getServers(); + ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]); + + LOG.info("Performing action: Move regions of table " + tableName); + for (HRegionInfo regionInfo:regions) { + try { + String destServerName = + servers[RandomUtils.nextInt(servers.length)].getServerName(); + LOG.debug("Moving " + regionInfo.getRegionNameAsString() + " to " + destServerName); + admin.move(regionInfo.getRegionName(), Bytes.toBytes(destServerName)); + } catch (Exception e) { + LOG.debug("Error moving region", e); + } + } + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java new file mode 100644 index 0000000..a437745 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RemoveColumnAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.util.List; +import java.util.Random; +import java.util.Set; + +/** + * Action that removes a column family. + */ +public class RemoveColumnAction extends Action { + private final byte[] tableName; + private final Set protectedColumns; + private HBaseAdmin admin; + private Random random; + + public RemoveColumnAction(String tableName, Set protectedColumns) { + this.tableName = Bytes.toBytes(tableName); + this.protectedColumns = protectedColumns; + random = new Random(); + } + + @Override + public void init(ActionContext context) throws IOException { + super.init(context); + this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); + } + + @Override + public void perform() throws Exception { + HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); + HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); + + if (columnDescriptors.length <= 1) { + return; + } + + int index = random.nextInt(columnDescriptors.length); + while(protectedColumns.contains(columnDescriptors[index].getNameAsString())) { + index = random.nextInt(columnDescriptors.length); + } + + tableDescriptor.removeFamily(columnDescriptors[index].getName()); + + admin.modifyTable(tableName, tableDescriptor); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java new file mode 100644 index 0000000..830dcca --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActionBaseAction.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.util.Threads; + +import java.io.IOException; + +/** +* Base class for restarting HBaseServer's +*/ +public class RestartActionBaseAction extends Action { + long sleepTime; // how long should we sleep + + public RestartActionBaseAction(long sleepTime) { + this.sleepTime = sleepTime; + } + + void sleep(long sleepTime) { + LOG.info("Sleeping for:" + sleepTime); + Threads.sleep(sleepTime); + } + + void restartMaster(ServerName server, long sleepTime) throws IOException { + sleepTime = Math.max(sleepTime, 1000); + killMaster(server); + sleep(sleepTime); + startMaster(server); + } + + void restartRs(ServerName server, long sleepTime) throws IOException { + sleepTime = Math.max(sleepTime, 1000); + killRs(server); + sleep(sleepTime); + startRs(server); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java new file mode 100644 index 0000000..a9bc23a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartActiveMasterAction.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; + +/** +* Action that tries to restart the active master. +*/ +public class RestartActiveMasterAction extends RestartActionBaseAction { + public RestartActiveMasterAction(long sleepTime) { + super(sleepTime); + } + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart active master"); + + ServerName master = cluster.getClusterStatus().getMaster(); + restartMaster(master, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java new file mode 100644 index 0000000..18cdf4a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRandomRsAction.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +/** +* Created by eclark on 8/12/13. +*/ +public class RestartRandomRsAction extends RestartActionBaseAction { + public RestartRandomRsAction(long sleepTime) { + super(sleepTime); + } + + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart random region server"); + ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); + + restartRs(server, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java new file mode 100644 index 0000000..10d5e14 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingMetaAction.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.ServerName; + +/** +* Action that tries to restart the HRegionServer holding Meta. +*/ +public class RestartRsHoldingMetaAction extends RestartActionBaseAction { + public RestartRsHoldingMetaAction(long sleepTime) { + super(sleepTime); + } + @Override + public void perform() throws Exception { + LOG.info("Performing action: Restart region server holding META"); + ServerName server = cluster.getServerHoldingMeta(); + if (server == null) { + LOG.warn("No server is holding .META. right now."); + return; + } + restartRs(server, sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java new file mode 100644 index 0000000..1755ccd --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RestartRsHoldingTableAction.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.HTable; + +import java.io.IOException; +import java.util.Collection; + +/** +* Action that restarts an HRegionServer holding one of the regions of the table. +*/ +public class RestartRsHoldingTableAction extends RestartActionBaseAction { + + private final String tableName; + + public RestartRsHoldingTableAction(long sleepTime, String tableName) { + super(sleepTime); + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HTable table = null; + try { + Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration(); + table = new HTable(conf, tableName); + } catch (IOException e) { + LOG.debug("Error creating HTable used to get list of region locations.", e); + return; + } + + Collection serverNames = table.getRegionLocations().values(); + ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]); + + restartRs(nameArray[RandomUtils.nextInt(nameArray.length)], sleepTime); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java new file mode 100644 index 0000000..eccc304 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/RollingBatchRestartRsAction.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; + +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +/** + * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a + * server, or starts one, sleeping randomly (0-sleepTime) in between steps. + */ +public class RollingBatchRestartRsAction extends BatchRestartRsAction { + public RollingBatchRestartRsAction(long sleepTime, float ratio) { + super(sleepTime, ratio); + } + + @Override + public void perform() throws Exception { + LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", + (int)(ratio * 100))); + List selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), + ratio); + + Queue serversToBeKilled = new LinkedList(selectedServers); + Queue deadServers = new LinkedList(); + + // + while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) { + boolean action = true; //action true = kill server, false = start server + + if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) { + action = deadServers.isEmpty(); + } else { + action = RandomUtils.nextBoolean(); + } + + if (action) { + ServerName server = serversToBeKilled.remove(); + killRs(server); + deadServers.add(server); + } else { + ServerName server = deadServers.remove(); + startRs(server); + } + + sleep(RandomUtils.nextInt((int)sleepTime)); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java new file mode 100644 index 0000000..3071bd6 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SnapshotTableAction.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HBaseAdmin; + +/** +* Action that tries to take a snapshot of a table. +*/ +public class SnapshotTableAction extends Action { + private final String tableName; + private final long sleepTime; + + public SnapshotTableAction(String tableName) { + this(-1, tableName); + } + + public SnapshotTableAction(int sleepTime, String tableName) { + this.tableName = tableName; + this.sleepTime = sleepTime; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + String snapshotName = tableName + "-it-" + System.currentTimeMillis(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Snapshot table " + tableName); + admin.snapshot(snapshotName, tableName); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java new file mode 100644 index 0000000..5d91a4d --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/SplitRandomRegionOfTableAction.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; + +import java.util.List; + +/** +* Action that tries to split a random region of a table. +*/ +public class SplitRandomRegionOfTableAction extends Action { + private final byte[] tableNameBytes; + private final long sleepTime; + private final String tableName; + + public SplitRandomRegionOfTableAction(String tableName) { + this(-1, tableName); + } + + public SplitRandomRegionOfTableAction(int sleepTime, String tableName) { + this.tableNameBytes = Bytes.toBytes(tableName); + this.sleepTime = sleepTime; + this.tableName = tableName; + } + + @Override + public void perform() throws Exception { + HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); + HBaseAdmin admin = util.getHBaseAdmin(); + + LOG.info("Performing action: Split random region of table " + tableName); + List regions = admin.getTableRegions(tableNameBytes); + HRegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( + regions.toArray(new HRegionInfo[regions.size()])); + LOG.debug("Splitting region " + region.getRegionNameAsString()); + admin.split(region.getRegionName()); + if (sleepTime > 0) { + Thread.sleep(sleepTime); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java new file mode 100644 index 0000000..a558213 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java @@ -0,0 +1,50 @@ +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.junit.Assert; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** This action is too specific to put in ChaosMonkey; put it here */ +public class UnbalanceKillAndRebalanceAction extends Action { + /** Fractions of servers to get regions and live and die respectively; from all other + * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ + private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; + private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; + private static final double HOARD_FRC_OF_REGIONS = 0.8; + /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance + * and restarting the servers; to make sure these events have time to impact the cluster. */ + private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; + private static final long WAIT_FOR_KILLS_MS = 2 * 1000; + private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; + + @Override + public void perform() throws Exception { + ClusterStatus status = this.cluster.getClusterStatus(); + List victimServers = new LinkedList(status.getServers()); + int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); + int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); + Assert.assertTrue((liveCount + deadCount) < victimServers.size()); + List targetServers = new ArrayList(liveCount); + for (int i = 0; i < liveCount + deadCount; ++i) { + int victimIx = RandomUtils.nextInt(victimServers.size()); + targetServers.add(victimServers.remove(victimIx)); + } + unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); + Thread.sleep(WAIT_FOR_UNBALANCE_MS); + for (int i = 0; i < liveCount; ++i) { + killRs(targetServers.get(i)); + } + Thread.sleep(WAIT_FOR_KILLS_MS); + forceBalancer(); + Thread.sleep(WAIT_AFTER_BALANCE_MS); + for (int i = 0; i < liveCount; ++i) { + startRs(targetServers.get(i)); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java new file mode 100644 index 0000000..276144d --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceRegionsAction.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.actions; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.ClusterStatus; +import org.apache.hadoop.hbase.ServerName; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** +* Action that tries to unbalance the regions of a cluster. +*/ +public class UnbalanceRegionsAction extends Action { + private double fractionOfRegions; + private double fractionOfServers; + + /** + * Unbalances the regions on the cluster by choosing "target" servers, and moving + * some regions from each of the non-target servers to random target servers. + * @param fractionOfRegions Fraction of regions to move from each server. + * @param fractionOfServers Fraction of servers to be chosen as targets. + */ + public UnbalanceRegionsAction(double fractionOfRegions, double fractionOfServers) { + this.fractionOfRegions = fractionOfRegions; + this.fractionOfServers = fractionOfServers; + } + + @Override + public void perform() throws Exception { + LOG.info("Unbalancing regions"); + ClusterStatus status = this.cluster.getClusterStatus(); + List victimServers = new LinkedList(status.getServers()); + int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); + List targetServers = new ArrayList(targetServerCount); + for (int i = 0; i < targetServerCount; ++i) { + int victimIx = RandomUtils.nextInt(victimServers.size()); + targetServers.add(victimServers.remove(victimIx)); + } + unbalanceRegions(status, victimServers, targetServers, fractionOfRegions); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java new file mode 100644 index 0000000..d93c9eb --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/CalmMonkeyFactory.java @@ -0,0 +1,15 @@ +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.monkies.CalmChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; + + +/** + * Factory to create a calm ChaosMonkey. + */ +public class CalmMonkeyFactory extends MonkeyFactory { + @Override + public ChaosMonkey build() { + return new CalmChaosMonkey(); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java new file mode 100644 index 0000000..2164c19 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java @@ -0,0 +1,49 @@ +package org.apache.hadoop.hbase.chaos.factories; + +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; + +import java.util.Map; +import java.util.Set; + +/** + * Base class of the factory that will create a ChaosMonkey. + */ +public abstract class MonkeyFactory { + + protected String tableName; + protected Set columnFamilies; + protected IntegrationTestingUtility util; + + public MonkeyFactory setTableName(String tableName) { + this.tableName = tableName; + return this; + } + + public MonkeyFactory setColumnFamilies(Set columnFamilies) { + this.columnFamilies = columnFamilies; + return this; + } + + public MonkeyFactory setUtil(IntegrationTestingUtility util) { + this.util = util; + return this; + } + + public abstract ChaosMonkey build(); + + + public static Map FACTORIES = ImmutableMap.of( + "clam", new CalmMonkeyFactory(), + "slowDeterministic", new SlowDeterministicMonkeyFactory() + ); + + public static MonkeyFactory getFactory(String factoryName) { + MonkeyFactory fact = FACTORIES.get(factoryName); + if (fact == null) { + fact = FACTORIES.get("slowDeterministic"); + } + return fact; + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java new file mode 100644 index 0000000..20a31ad --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -0,0 +1,73 @@ +package org.apache.hadoop.hbase.chaos.factories; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.AddColumnAction; +import org.apache.hadoop.hbase.chaos.actions.BatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeEncodingAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeVersionsAction; +import org.apache.hadoop.hbase.chaos.actions.CompactRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.CompactTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushTableAction; +import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.RemoveColumnAction; +import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction; +import org.apache.hadoop.hbase.chaos.actions.SnapshotTableAction; +import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy; +import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; + +public class SlowDeterministicMonkeyFactory extends MonkeyFactory { + @Override + public ChaosMonkey build() { + + // Actions such as compact/flush a table/region, + // move one region around. They are not so destructive, + // can be executed more frequently. + Action[] actions1 = new Action[] { + new CompactTableAction(tableName, 0.5f), + new CompactRandomRegionOfTableAction(tableName, 0.6f), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) + }; + + // Actions such as split/merge/snapshot. + // They should not cause data loss, or unreliability + // such as region stuck in transition. + Action[] actions2 = new Action[] { + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new ChangeEncodingAction(tableName), + new ChangeVersionsAction(tableName) + }; + + // Destructive actions to mess things around. + Action[] actions3 = new Action[] { + new MoveRegionsOfTableAction(tableName), + new RestartRandomRsAction(60000), + new BatchRestartRsAction(5000, 0.5f), + new RestartActiveMasterAction(5000), + new RollingBatchRestartRsAction(5000, 1.0f), + new RestartRsHoldingMetaAction(35000) + }; + + return new PolicyBasedChaosMonkey(util, + new PeriodicRandomActionPolicy(60 * 1000, actions1), + new PeriodicRandomActionPolicy(90 * 1000, actions2), + new CompositeSequentialPolicy( + new DoActionsOncePolicy(150 * 1000, actions3), + new PeriodicRandomActionPolicy(150 * 1000, actions3))); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java new file mode 100644 index 0000000..8aa858a --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/CalmChaosMonkey.java @@ -0,0 +1,27 @@ +package org.apache.hadoop.hbase.chaos.monkies; + + +/** + * Chaos Monkey that does nothing. + */ +public class CalmChaosMonkey extends ChaosMonkey { + @Override + public void start() throws Exception { + + } + + @Override + public void stop(String why) { + + } + + @Override + public boolean isStopped() { + return false; + } + + @Override + public void waitForStop() throws InterruptedException { + + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java new file mode 100644 index 0000000..74c5bd8 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/ChaosMonkey.java @@ -0,0 +1,16 @@ +package org.apache.hadoop.hbase.chaos.monkies; + +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.util.Tool; + +public abstract class ChaosMonkey implements Stoppable { + public abstract void start() throws Exception; + + @Override + public abstract void stop(String why); + + @Override + public abstract boolean isStopped(); + + public abstract void waitForStop() throws InterruptedException; +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java new file mode 100644 index 0000000..eb9219b --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.monkies; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.lang.math.RandomUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.policies.Policy; +import org.apache.hadoop.hbase.util.Pair; + +/** + * A utility to injects faults in a running cluster. + *

+ * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like + * - Select a random server to kill + * - Sleep for 5 sec + * - Start the server on the same host + * Actions can also be complex events, like rolling restart of all of the servers. + *

+ * Policies on the other hand are responsible for executing the actions based on a strategy. + * The default policy is to execute a random action every minute based on predefined action + * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one + * policy can be active at any time. + *

+ * Chaos monkey can be run from the command line, or can be invoked from integration tests. + * See {@link IntegrationTestDataIngestWithChaosMonkey} or other integration tests that use + * chaos monkey for code examples. + *

+ * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: + * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html + */ +public class PolicyBasedChaosMonkey extends ChaosMonkey { + + private static final Log LOG = LogFactory.getLog(PolicyBasedChaosMonkey.class); + private static final long ONE_SEC = 1000; + private static final long FIVE_SEC = 5 * ONE_SEC; + private static final long ONE_MIN = 60 * ONE_SEC; + + public static final long TIMEOUT = ONE_MIN; + + final IntegrationTestingUtility util; + + /** + * Construct a new ChaosMonkey + * @param util the HBaseIntegrationTestingUtility already configured + * @param policies custom policies to use + */ + public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Policy... policies) { + this.util = util; + this.policies = policies; + } + + public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Collection policies) { + this.util = util; + this.policies = policies.toArray(new Policy[policies.size()]); + } + + + /** Selects a random item from the given items */ + public static T selectRandomItem(T[] items) { + return items[RandomUtils.nextInt(items.length)]; + } + + /** Selects a random item from the given items with weights*/ + public static T selectWeightedRandomItem(List> items) { + int totalWeight = 0; + for (Pair pair : items) { + totalWeight += pair.getSecond(); + } + + int cutoff = RandomUtils.nextInt(totalWeight); + int cummulative = 0; + T item = null; + + //warn: O(n) + for (int i=0; i List selectRandomItems(T[] items, float ratio) { + int remaining = (int)Math.ceil(items.length * ratio); + + List selectedItems = new ArrayList(remaining); + + for (int i=0; i 0; i++) { + if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) { + selectedItems.add(items[i]); + remaining--; + } + } + + return selectedItems; + } + + private Policy[] policies; + private Thread[] monkeyThreads; + + @Override + public void start() throws Exception { + monkeyThreads = new Thread[policies.length]; + + for (int i=0; i policies; + public CompositeSequentialPolicy(Policy... policies) { + this.policies = Arrays.asList(policies); + } + + @Override + public void stop(String why) { + super.stop(why); + for (Policy p : policies) { + p.stop(why); + } + } + + @Override + public void run() { + for (Policy p : policies) { + p.run(); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Policy p : policies) { + p.init(context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java new file mode 100644 index 0000000..972ba0c --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/DoActionsOncePolicy.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.util.StringUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** A policy which performs a sequence of actions deterministically. */ +public class DoActionsOncePolicy extends PeriodicPolicy { + private List actions; + + public DoActionsOncePolicy(long periodMs, List actions) { + super(periodMs); + this.actions = new ArrayList(actions); + } + + public DoActionsOncePolicy(long periodMs, Action... actions) { + this(periodMs, Arrays.asList(actions)); + } + + @Override + protected void runOneIteration() { + if (actions.isEmpty()) { + this.stop("done"); + return; + } + Action action = actions.remove(0); + + try { + action.perform(); + } catch (Exception ex) { + LOG.warn("Exception occured during performing action: " + + StringUtils.stringifyException(ex)); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Action action : actions) { + action.init(this.context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java new file mode 100644 index 0000000..b7b6109 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicPolicy.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.hadoop.hbase.util.Threads; + +/** A policy which does stuff every time interval. */ +public abstract class PeriodicPolicy extends Policy { + private long periodMs; + + public PeriodicPolicy(long periodMs) { + this.periodMs = periodMs; + } + + @Override + public void run() { + // Add some jitter. + int jitter = RandomUtils.nextInt((int) periodMs); + LOG.info("Sleeping for " + jitter + " to add jitter"); + Threads.sleep(jitter); + + while (!isStopped()) { + long start = System.currentTimeMillis(); + runOneIteration(); + + if (isStopped()) return; + long sleepTime = periodMs - (System.currentTimeMillis() - start); + if (sleepTime > 0) { + LOG.info("Sleeping for: " + sleepTime); + Threads.sleep(sleepTime); + } + } + } + + protected abstract void runOneIteration(); + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + LOG.info("Using ChaosMonkey Policy: " + this.getClass() + ", period: " + periodMs); + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java new file mode 100644 index 0000000..89e1c40 --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/PeriodicRandomActionPolicy.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.util.StringUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A policy, which picks a random action according to the given weights, + * and performs it every configurable period. + */ +public class PeriodicRandomActionPolicy extends PeriodicPolicy { + private List> actions; + + public PeriodicRandomActionPolicy(long periodMs, List> actions) { + super(periodMs); + this.actions = actions; + } + + public PeriodicRandomActionPolicy(long periodMs, Pair... actions) { + // We don't expect it to be modified. + this(periodMs, Arrays.asList(actions)); + } + + public PeriodicRandomActionPolicy(long periodMs, Action... actions) { + super(periodMs); + this.actions = new ArrayList>(actions.length); + for (Action action : actions) { + this.actions.add(new Pair(action, 1)); + } + } + + @Override + protected void runOneIteration() { + Action action = PolicyBasedChaosMonkey.selectWeightedRandomItem(actions); + try { + action.perform(); + } catch (Exception ex) { + LOG.warn("Exception occured during performing action: " + + StringUtils.stringifyException(ex)); + } + } + + @Override + public void init(PolicyContext context) throws Exception { + super.init(context); + for (Pair action : actions) { + action.getFirst().init(this.context); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java new file mode 100644 index 0000000..affaeda --- /dev/null +++ hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/policies/Policy.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.chaos.policies; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.IntegrationTestingUtility; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.util.StoppableImplementation; + +/** + * A policy to introduce chaos to the cluster + */ +public abstract class Policy extends StoppableImplementation implements Runnable { + + protected static Log LOG = LogFactory.getLog(Policy.class); + + protected PolicyContext context; + + public void init(PolicyContext context) throws Exception { + this.context = context; + } + + /** + * A context for a Policy + */ + public static class PolicyContext extends Action.ActionContext { + public PolicyContext(IntegrationTestingUtility util) { + super(util); + } + } +} diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java index a4feca5..ccfbf0c 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/mttr/IntegrationTestMTTR.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; @@ -36,9 +37,12 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.KeyOnlyFilter; +import org.apache.hadoop.hbase.chaos.actions.Action; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ChaosMonkey; import org.apache.hadoop.hbase.util.LoadTestTool; +import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingMetaAction; +import org.apache.hadoop.hbase.chaos.actions.RestartRsHoldingTableAction; import org.cloudera.htrace.Sampler; import org.cloudera.htrace.Span; import org.cloudera.htrace.Trace; @@ -124,10 +128,10 @@ public class IntegrationTestMTTR { /** * All of the chaos monkey actions used. */ - private static ChaosMonkey.Action restartRSAction; - private static ChaosMonkey.Action restartMetaAction; - private static ChaosMonkey.Action moveRegionAction; - private static ChaosMonkey.Action restartMasterAction; + private static Action restartRSAction; + private static Action restartMetaAction; + private static Action moveRegionAction; + private static Action restartMasterAction; /** * The load test tool used to create load and make sure that HLogs aren't empty. @@ -163,19 +167,19 @@ public class IntegrationTestMTTR { private static void setupActions() throws IOException { // Set up the action that will restart a region server holding a region from our table // because this table should only have one region we should be good. - restartRSAction = new ChaosMonkey.RestartRsHoldingTable(SLEEP_TIME, tableName.getNameAsString()); + restartRSAction = new RestartRsHoldingTableAction(SLEEP_TIME, tableName.getNameAsString()); // Set up the action that will kill the region holding meta. - restartMetaAction = new ChaosMonkey.RestartRsHoldingMeta(SLEEP_TIME); + restartMetaAction = new RestartRsHoldingMetaAction(SLEEP_TIME); // Set up the action that will move the regions of our table. - moveRegionAction = new ChaosMonkey.MoveRegionsOfTable(SLEEP_TIME, tableName.getNameAsString()); + moveRegionAction = new MoveRegionsOfTableAction(SLEEP_TIME, tableName.getNameAsString()); // Kill the master - restartMasterAction = new ChaosMonkey.RestartActiveMaster(1000); + restartMasterAction = new RestartActiveMasterAction(1000); // Give the action the access to the cluster. - ChaosMonkey.ActionContext actionContext = new ChaosMonkey.ActionContext(util); + Action.ActionContext actionContext = new Action.ActionContext(util); restartRSAction.init(actionContext); restartMetaAction.init(actionContext); moveRegionAction.init(actionContext); @@ -235,7 +239,7 @@ public class IntegrationTestMTTR { @Test public void testRestartRsHoldingTable() throws Exception { - run(new ActionCallable(restartRSAction), "RestartRsHoldingTable"); + run(new ActionCallable(restartRSAction), "RestartRsHoldingTableAction"); } @Test @@ -476,9 +480,9 @@ public class IntegrationTestMTTR { public class ActionCallable implements Callable { - private final ChaosMonkey.Action action; + private final Action action; - public ActionCallable(ChaosMonkey.Action action) { + public ActionCallable(Action action) { this.action = action; } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java index bb7cdb4..9b6167d 100644 --- hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java +++ hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestBigLinkedListWithChaosMonkey.java @@ -24,8 +24,23 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.IntegrationTests; -import org.apache.hadoop.hbase.util.ChaosMonkey; -import org.apache.hadoop.hbase.util.ChaosMonkey.*; +import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.apache.hadoop.hbase.chaos.actions.AddColumnAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeEncodingAction; +import org.apache.hadoop.hbase.chaos.actions.ChangeVersionsAction; +import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; +import org.apache.hadoop.hbase.chaos.actions.CompactRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.CompactTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.FlushTableAction; +import org.apache.hadoop.hbase.chaos.actions.MergeRandomAdjacentRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.MoveRegionsOfTableAction; +import org.apache.hadoop.hbase.chaos.actions.RemoveColumnAction; +import org.apache.hadoop.hbase.chaos.actions.SnapshotTableAction; +import org.apache.hadoop.hbase.chaos.actions.SplitRandomRegionOfTableAction; +import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; import org.apache.hadoop.util.ToolRunner; import org.junit.After; import org.junit.Before; @@ -69,31 +84,31 @@ public class IntegrationTestBigLinkedListWithChaosMonkey extends IntegrationTest // move one region around. They are not so destructive, // can be executed more frequently. Action[] actions1 = new Action[] { - new CompactTable(tableName, 0.5f), - new CompactRandomRegionOfTable(tableName, 0.6f), - new FlushTable(tableName), - new FlushRandomRegionOfTable(tableName), - new MoveRandomRegionOfTable(tableName) + new CompactTableAction(tableName, 0.5f), + new CompactRandomRegionOfTableAction(tableName, 0.6f), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) }; // Actions such as split/merge/snapshot. // They should not cause data loss, or unreliability // such as region stuck in transition. Action[] actions2 = new Action[] { - new SplitRandomRegionOfTable(tableName), - new MergeRandomAdjacentRegionsOfTable(tableName), - new SnapshotTable(tableName), - new MoveRegionsOfTable(tableName), + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new MoveRegionsOfTableAction(tableName), new AddColumnAction(tableName), new RemoveColumnAction(tableName), new ChangeEncodingAction(tableName), new ChangeVersionsAction(tableName) }; - monkey = new ChaosMonkey(util, + monkey = new PolicyBasedChaosMonkey(util, new PeriodicRandomActionPolicy(30 * 1000, actions1), new PeriodicRandomActionPolicy(60 * 1000, actions2), - ChaosMonkey.getPolicyByName(ChaosMonkey.EVERY_MINUTE_RANDOM_ACTION_POLICY)); + PolicyBasedChaosMonkey.getPolicyByName(PolicyBasedChaosMonkey.EVERY_MINUTE_RANDOM_ACTION_POLICY)); LOG.info("Chaos Monkey Starting"); monkey.start(); } diff --git hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java deleted file mode 100644 index cafceb7..0000000 --- hbase-it/src/test/java/org/apache/hadoop/hbase/util/ChaosMonkey.java +++ /dev/null @@ -1,1213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.util; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.Random; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.lang.RandomStringUtils; -import org.apache.commons.lang.math.RandomUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.HBaseCluster; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey; -import org.apache.hadoop.hbase.IntegrationTestingUtility; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.Stoppable; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolRunner; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -/** - * A utility to injects faults in a running cluster. - *

- * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like - * - Select a random server to kill - * - Sleep for 5 sec - * - Start the server on the same host - * Actions can also be complex events, like rolling restart of all of the servers. - *

- * Policies on the other hand are responsible for executing the actions based on a strategy. - * The default policy is to execute a random action every minute based on predefined action - * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one - * policy can be active at any time. - *

- * Chaos monkey can be run from the command line, or can be invoked from integration tests. - * See {@link IntegrationTestDataIngestWithChaosMonkey} or other integration tests that use - * chaos monkey for code examples. - *

- * ChaosMonkey class is indeed inspired by the Netflix's same-named tool: - * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html - */ -public class ChaosMonkey extends AbstractHBaseTool implements Stoppable { - - private static final Log LOG = LogFactory.getLog(ChaosMonkey.class); - - private static final long ONE_SEC = 1000; - private static final long FIVE_SEC = 5 * ONE_SEC; - private static final long ONE_MIN = 60 * ONE_SEC; - private static final long TIMEOUT = ONE_MIN; - - final IntegrationTestingUtility util; - - /** - * Construct a new ChaosMonkey - * @param util the HBaseIntegrationTestingUtility already configured - * @param policies names of pre-defined policies to use - */ - public ChaosMonkey(IntegrationTestingUtility util, String... policies) { - this.util = util; - setPoliciesByName(policies); - } - - /** - * Construct a new ChaosMonkey - * @param util the HBaseIntegrationTestingUtility already configured - * @param policies custom policies to use - */ - public ChaosMonkey(IntegrationTestingUtility util, Policy... policies) { - this.util = util; - this.policies = policies; - } - - private void setPoliciesByName(String... policies) { - this.policies = new Policy[policies.length]; - for (int i=0; i < policies.length; i++) { - this.policies[i] = getPolicyByName(policies[i]); - } - } - - public static Policy getPolicyByName(String policy) { - return NAMED_POLICIES.get(policy); - } - - /** - * Context for Action's - */ - public static class ActionContext { - private IntegrationTestingUtility util; - - public ActionContext(IntegrationTestingUtility util) { - this.util = util; - } - - public IntegrationTestingUtility getHaseIntegrationTestingUtility() { - return util; - } - - public HBaseCluster getHBaseCluster() { - return util.getHBaseClusterInterface(); - } - } - - /** - * A (possibly mischievous) action that the ChaosMonkey can perform. - */ - public static class Action { - // TODO: interesting question - should actions be implemented inside - // ChaosMonkey, or outside? If they are inside (initial), the class becomes - // huge and all-encompassing; if they are outside ChaosMonkey becomes just - // a random task scheduler. For now, keep inside. - - protected ActionContext context; - protected HBaseCluster cluster; - protected ClusterStatus initialStatus; - protected ServerName[] initialServers; - - public void init(ActionContext context) throws IOException { - this.context = context; - cluster = context.getHBaseCluster(); - initialStatus = cluster.getInitialClusterStatus(); - Collection regionServers = initialStatus.getServers(); - initialServers = regionServers.toArray(new ServerName[regionServers.size()]); - } - - public void perform() throws Exception { }; - - // TODO: perhaps these methods should be elsewhere? - /** Returns current region servers */ - protected ServerName[] getCurrentServers() throws IOException { - Collection regionServers = cluster.getClusterStatus().getServers(); - if (regionServers == null || regionServers.size() <= 0) return new ServerName [] {}; - return regionServers.toArray(new ServerName[regionServers.size()]); - } - - protected void killMaster(ServerName server) throws IOException { - LOG.info("Killing master:" + server); - cluster.killMaster(server); - cluster.waitForMasterToStop(server, TIMEOUT); - LOG.info("Killed master server:" + server); - } - - protected void startMaster(ServerName server) throws IOException { - LOG.info("Starting master:" + server.getHostname()); - cluster.startMaster(server.getHostname()); - cluster.waitForActiveAndReadyMaster(TIMEOUT); - LOG.info("Started master: " + server); - } - - protected void killRs(ServerName server) throws IOException { - LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); - cluster.waitForRegionServerToStop(server, TIMEOUT); - LOG.info("Killed region server:" + server + ". Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - - protected void startRs(ServerName server) throws IOException { - LOG.info("Starting region server:" + server.getHostname()); - cluster.startRegionServer(server.getHostname()); - cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT); - LOG.info("Started region server:" + server + ". Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - - protected void unbalanceRegions(ClusterStatus clusterStatus, - List fromServers, List toServers, - double fractionOfRegions) throws Exception { - List victimRegions = new LinkedList(); - for (ServerName server : fromServers) { - ServerLoad serverLoad = clusterStatus.getLoad(server); - // Ugh. - List regions = new LinkedList(serverLoad.getRegionsLoad().keySet()); - int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); - LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName()); - for (int i = 0; i < victimRegionCount; ++i) { - int victimIx = RandomUtils.nextInt(regions.size()); - String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); - victimRegions.add(Bytes.toBytes(regionId)); - } - } - - LOG.info("Moving " + victimRegions.size() + " regions from " + fromServers.size() - + " servers to " + toServers.size() + " different servers"); - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - for (byte[] victimRegion : victimRegions) { - int targetIx = RandomUtils.nextInt(toServers.size()); - admin.move(victimRegion, Bytes.toBytes(toServers.get(targetIx).getServerName())); - } - } - - protected void forceBalancer() throws Exception { - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - boolean result = admin.balancer(); - if (!result) { - LOG.error("Balancer didn't succeed"); - } - } - } - - private static class RestartActionBase extends Action { - long sleepTime; // how long should we sleep - - public RestartActionBase(long sleepTime) { - this.sleepTime = sleepTime; - } - - void sleep(long sleepTime) { - LOG.info("Sleeping for:" + sleepTime); - Threads.sleep(sleepTime); - } - - void restartMaster(ServerName server, long sleepTime) throws IOException { - sleepTime = Math.max(sleepTime, 1000); - killMaster(server); - sleep(sleepTime); - startMaster(server); - } - - void restartRs(ServerName server, long sleepTime) throws IOException { - sleepTime = Math.max(sleepTime, 1000); - killRs(server); - sleep(sleepTime); - startRs(server); - } - } - - public static class RestartActiveMaster extends RestartActionBase { - public RestartActiveMaster(long sleepTime) { - super(sleepTime); - } - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart active master"); - - ServerName master = cluster.getClusterStatus().getMaster(); - restartMaster(master, sleepTime); - } - } - - public static class RestartRandomRs extends RestartActionBase { - public RestartRandomRs(long sleepTime) { - super(sleepTime); - } - - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart random region server"); - ServerName server = selectRandomItem(getCurrentServers()); - - restartRs(server, sleepTime); - } - } - - public static class RestartRsHoldingMeta extends RestartActionBase { - public RestartRsHoldingMeta(long sleepTime) { - super(sleepTime); - } - @Override - public void perform() throws Exception { - LOG.info("Performing action: Restart region server holding META"); - ServerName server = cluster.getServerHoldingMeta(); - if (server == null) { - LOG.warn("No server is holding .META. right now."); - return; - } - restartRs(server, sleepTime); - } - } - - public static class RestartRsHoldingTable extends RestartActionBase { - - private final String tableName; - - public RestartRsHoldingTable(long sleepTime, String tableName) { - super(sleepTime); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HTable table = null; - try { - Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration(); - table = new HTable(conf, tableName); - } catch (IOException e) { - LOG.debug("Error creating HTable used to get list of region locations.", e); - return; - } - - Collection serverNames = table.getRegionLocations().values(); - ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]); - - restartRs(nameArray[RandomUtils.nextInt(nameArray.length)], sleepTime); - } - } - - public static class MoveRegionsOfTable extends Action { - private final long sleepTime; - private final byte[] tableNameBytes; - private final String tableName; - - public MoveRegionsOfTable(String tableName) { - this(-1, tableName); - } - - public MoveRegionsOfTable(long sleepTime, String tableName) { - this.sleepTime = sleepTime; - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - - List regions = admin.getTableRegions(tableNameBytes); - Collection serversList = admin.getClusterStatus().getServers(); - ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]); - - LOG.info("Performing action: Move regions of table " + tableName); - for (HRegionInfo regionInfo:regions) { - try { - String destServerName = - servers[RandomUtils.nextInt(servers.length)].getServerName(); - LOG.debug("Moving " + regionInfo.getRegionNameAsString() + " to " + destServerName); - admin.move(regionInfo.getRegionName(), Bytes.toBytes(destServerName)); - } catch (Exception e) { - LOG.debug("Error moving region", e); - } - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class MoveRandomRegionOfTable extends Action { - private final long sleepTime; - private final byte[] tableNameBytes; - private final String tableName; - - public MoveRandomRegionOfTable(String tableName) { - this(-1, tableName); - } - - public MoveRandomRegionOfTable(long sleepTime, String tableName) { - this.sleepTime = sleepTime; - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Move random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Unassigning region " + region.getRegionNameAsString()); - admin.unassign(region.getRegionName(), false); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class SplitRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public SplitRandomRegionOfTable(String tableName) { - this(-1, tableName); - } - - public SplitRandomRegionOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Split random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Splitting region " + region.getRegionNameAsString()); - admin.split(region.getRegionName()); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class MergeRandomAdjacentRegionsOfTable extends Action { - private final byte[] tableNameBytes; - private final String tableName; - private final long sleepTime; - - public MergeRandomAdjacentRegionsOfTable(String tableName) { - this(-1, tableName); - } - - public MergeRandomAdjacentRegionsOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.tableName = tableName; - this.sleepTime = sleepTime; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Merge random adjacent regions of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - if (regions.size() < 2) { - LOG.info("Table " + tableName + " doesn't have enough region to merge"); - return; - } - - int i = RandomUtils.nextInt(regions.size() - 1); - HRegionInfo a = regions.get(i++); - HRegionInfo b = regions.get(i); - LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString()); - admin.mergeRegions(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class CompactTable extends Action { - private final byte[] tableNameBytes; - private final int majorRatio; - private final long sleepTime; - private final String tableName; - - public CompactTable( - String tableName, float majorRatio) { - this(-1, tableName, majorRatio); - } - - public CompactTable( - int sleepTime, String tableName, float majorRatio) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.majorRatio = (int) (100 * majorRatio); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - boolean major = RandomUtils.nextInt(100) < majorRatio; - - LOG.info("Performing action: Compact table " + tableName + ", major=" + major); - if (major) { - admin.majorCompact(tableNameBytes); - } else { - admin.compact(tableNameBytes); - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class CompactRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final int majorRatio; - private final long sleepTime; - private final String tableName; - - public CompactRandomRegionOfTable( - String tableName, float majorRatio) { - this(-1, tableName, majorRatio); - } - - public CompactRandomRegionOfTable( - int sleepTime, String tableName, float majorRatio) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.majorRatio = (int) (100 * majorRatio); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - List regions = admin.getTableRegions(tableNameBytes); - boolean major = RandomUtils.nextInt(100) < majorRatio; - - LOG.info("Performing action: Compact random region of table " - + tableName + ", major=" + major); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - - if (major) { - LOG.debug("Major compacting region " + region.getRegionNameAsString()); - admin.majorCompact(region.getRegionName()); - } else { - LOG.debug("Compacting region " + region.getRegionNameAsString()); - admin.compact(region.getRegionName()); - } - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class FlushTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public FlushTable(String tableName) { - this(-1, tableName); - } - - public FlushTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Flush table " + tableName); - admin.flush(tableNameBytes); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class FlushRandomRegionOfTable extends Action { - private final byte[] tableNameBytes; - private final long sleepTime; - private final String tableName; - - public FlushRandomRegionOfTable(String tableName) { - this (-1, tableName); - } - - public FlushRandomRegionOfTable(int sleepTime, String tableName) { - this.tableNameBytes = Bytes.toBytes(tableName); - this.sleepTime = sleepTime; - this.tableName = tableName; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Flush random region of table " + tableName); - List regions = admin.getTableRegions(tableNameBytes); - HRegionInfo region = selectRandomItem( - regions.toArray(new HRegionInfo[regions.size()])); - LOG.debug("Flushing region " + region.getRegionNameAsString()); - admin.flush(region.getRegionName()); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - public static class SnapshotTable extends Action { - private final String tableName; - private final long sleepTime; - - public SnapshotTable(String tableName) { - this(-1, tableName); - } - - public SnapshotTable(int sleepTime, String tableName) { - this.tableName = tableName; - this.sleepTime = sleepTime; - } - - @Override - public void perform() throws Exception { - HBaseTestingUtility util = context.getHaseIntegrationTestingUtility(); - String snapshotName = tableName + "-it-" + System.currentTimeMillis(); - HBaseAdmin admin = util.getHBaseAdmin(); - - LOG.info("Performing action: Snapshot table " + tableName); - admin.snapshot(snapshotName, tableName); - if (sleepTime > 0) { - Thread.sleep(sleepTime); - } - } - } - - - /** - * Restarts a ratio of the running regionservers at the same time - */ - public static class BatchRestartRs extends RestartActionBase { - float ratio; //ratio of regionservers to restart - - public BatchRestartRs(long sleepTime, float ratio) { - super(sleepTime); - this.ratio = ratio; - } - - @Override - public void perform() throws Exception { - LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", - (int)(ratio * 100))); - List selectedServers = selectRandomItems(getCurrentServers(), ratio); - - for (ServerName server : selectedServers) { - LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); - } - - for (ServerName server : selectedServers) { - cluster.waitForRegionServerToStop(server, TIMEOUT); - } - - LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - - sleep(sleepTime); - - for (ServerName server : selectedServers) { - LOG.info("Starting region server:" + server.getHostname()); - cluster.startRegionServer(server.getHostname()); - - } - for (ServerName server : selectedServers) { - cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT); - } - LOG.info("Started " + selectedServers.size() +" region servers. Reported num of rs:" - + cluster.getClusterStatus().getServersSize()); - } - } - - /** - * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a - * server, or starts one, sleeping randomly (0-sleepTime) in between steps. - */ - public static class RollingBatchRestartRs extends BatchRestartRs { - public RollingBatchRestartRs(long sleepTime, float ratio) { - super(sleepTime, ratio); - } - - @Override - public void perform() throws Exception { - LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", - (int)(ratio * 100))); - List selectedServers = selectRandomItems(getCurrentServers(), ratio); - - Queue serversToBeKilled = new LinkedList(selectedServers); - Queue deadServers = new LinkedList(); - - // - while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) { - boolean action = true; //action true = kill server, false = start server - - if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) { - action = deadServers.isEmpty(); - } else { - action = RandomUtils.nextBoolean(); - } - - if (action) { - ServerName server = serversToBeKilled.remove(); - killRs(server); - deadServers.add(server); - } else { - ServerName server = deadServers.remove(); - startRs(server); - } - - sleep(RandomUtils.nextInt((int)sleepTime)); - } - } - } - - public static class UnbalanceRegionsAction extends Action { - private double fractionOfRegions; - private double fractionOfServers; - - /** - * Unbalances the regions on the cluster by choosing "target" servers, and moving - * some regions from each of the non-target servers to random target servers. - * @param fractionOfRegions Fraction of regions to move from each server. - * @param fractionOfServers Fraction of servers to be chosen as targets. - */ - public UnbalanceRegionsAction(double fractionOfRegions, double fractionOfServers) { - this.fractionOfRegions = fractionOfRegions; - this.fractionOfServers = fractionOfServers; - } - - @Override - public void perform() throws Exception { - LOG.info("Unbalancing regions"); - ClusterStatus status = this.cluster.getClusterStatus(); - List victimServers = new LinkedList(status.getServers()); - int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); - List targetServers = new ArrayList(targetServerCount); - for (int i = 0; i < targetServerCount; ++i) { - int victimIx = RandomUtils.nextInt(victimServers.size()); - targetServers.add(victimServers.remove(victimIx)); - } - unbalanceRegions(status, victimServers, targetServers, fractionOfRegions); - } - } - - public static class ForceBalancerAction extends Action { - @Override - public void perform() throws Exception { - LOG.info("Balancing regions"); - forceBalancer(); - } - } - - public static class AddColumnAction extends ChaosMonkey.Action { - - private byte[] tableName; - private HBaseAdmin admin; - - public AddColumnAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor columnDescriptor = null; - - while(columnDescriptor == null || - tableDescriptor.getFamily(columnDescriptor.getName()) != null) { - columnDescriptor = new HColumnDescriptor(RandomStringUtils.randomAlphabetic(5)); - } - - tableDescriptor.addFamily(columnDescriptor); - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class RemoveColumnAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public RemoveColumnAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if (columnDescriptors.length <= 1) { - return; - } - - int index = random.nextInt(columnDescriptors.length); - while(columnDescriptors[index].getNameAsString().equals( - Bytes.toString(LoadTestTool.COLUMN_FAMILY))) { - index = random.nextInt(columnDescriptors.length); - } - - tableDescriptor.removeFamily(columnDescriptors[index].getName()); - - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class ChangeVersionsAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public ChangeVersionsAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if ( columnDescriptors == null || columnDescriptors.length == 0) { - return; - } - - int versions = random.nextInt(3) + 1; - for(HColumnDescriptor descriptor:columnDescriptors) { - descriptor.setMaxVersions(versions); - descriptor.setMinVersions(versions); - } - - admin.modifyTable(tableName, tableDescriptor); - } - } - - public static class ChangeEncodingAction extends ChaosMonkey.Action { - private byte[] tableName; - private HBaseAdmin admin; - private Random random; - - public ChangeEncodingAction(String tableName) { - this.tableName = Bytes.toBytes(tableName); - random = new Random(); - } - - @Override - public void init(ActionContext context) throws IOException { - super.init(context); - this.admin = context.getHaseIntegrationTestingUtility().getHBaseAdmin(); - } - - @Override - public void perform() throws Exception { - HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName); - HColumnDescriptor[] columnDescriptors = tableDescriptor.getColumnFamilies(); - - if (columnDescriptors == null || columnDescriptors.length == 0) { - return; - } - - // possible DataBlockEncoding id's - int[] possibleIds = {0, 2, 3, 4, 6}; - for (HColumnDescriptor descriptor : columnDescriptors) { - short id = (short) possibleIds[random.nextInt(possibleIds.length)]; - descriptor.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id)); - } - - admin.modifyTable(tableName, tableDescriptor); - } - } - - /** - * A context for a Policy - */ - public static class PolicyContext extends ActionContext { - public PolicyContext(IntegrationTestingUtility util) { - super(util); - } - } - - /** - * A policy to introduce chaos to the cluster - */ - public static abstract class Policy extends StoppableImplementation implements Runnable { - protected PolicyContext context; - - public void init(PolicyContext context) throws Exception { - this.context = context; - } - } - - /** A policy that runs multiple other policies one after the other */ - public static class CompositeSequentialPolicy extends Policy { - private List policies; - public CompositeSequentialPolicy(Policy... policies) { - this.policies = Arrays.asList(policies); - } - - @Override - public void stop(String why) { - super.stop(why); - for (Policy p : policies) { - p.stop(why); - } - } - - @Override - public void run() { - for (Policy p : policies) { - p.run(); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Policy p : policies) { - p.init(context); - } - } - } - - /** A policy which does stuff every time interval. */ - public static abstract class PeriodicPolicy extends Policy { - private long periodMs; - - public PeriodicPolicy(long periodMs) { - this.periodMs = periodMs; - } - - @Override - public void run() { - // Add some jitter. - int jitter = RandomUtils.nextInt((int)periodMs); - LOG.info("Sleeping for " + jitter + " to add jitter"); - Threads.sleep(jitter); - - while (!isStopped()) { - long start = System.currentTimeMillis(); - runOneIteration(); - - if (isStopped()) return; - long sleepTime = periodMs - (System.currentTimeMillis() - start); - if (sleepTime > 0) { - LOG.info("Sleeping for: " + sleepTime); - Threads.sleep(sleepTime); - } - } - } - - protected abstract void runOneIteration(); - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - LOG.info("Using ChaosMonkey Policy: " + this.getClass() + ", period: " + periodMs); - } - } - - - /** A policy which performs a sequence of actions deterministically. */ - public static class DoActionsOncePolicy extends PeriodicPolicy { - private List actions; - - public DoActionsOncePolicy(long periodMs, List actions) { - super(periodMs); - this.actions = new ArrayList(actions); - } - - public DoActionsOncePolicy(long periodMs, Action... actions) { - this(periodMs, Arrays.asList(actions)); - } - - @Override - protected void runOneIteration() { - if (actions.isEmpty()) { - this.stop("done"); - return; - } - Action action = actions.remove(0); - - try { - action.perform(); - } catch (Exception ex) { - LOG.warn("Exception occured during performing action: " - + StringUtils.stringifyException(ex)); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Action action : actions) { - action.init(this.context); - } - } - } - - /** - * A policy, which picks a random action according to the given weights, - * and performs it every configurable period. - */ - public static class PeriodicRandomActionPolicy extends PeriodicPolicy { - private List> actions; - - public PeriodicRandomActionPolicy(long periodMs, List> actions) { - super(periodMs); - this.actions = actions; - } - - public PeriodicRandomActionPolicy(long periodMs, Pair... actions) { - // We don't expect it to be modified. - this(periodMs, Arrays.asList(actions)); - } - - public PeriodicRandomActionPolicy(long periodMs, Action... actions) { - super(periodMs); - this.actions = new ArrayList>(actions.length); - for (Action action : actions) { - this.actions.add(new Pair(action, 1)); - } - } - - @Override - protected void runOneIteration() { - Action action = selectWeightedRandomItem(actions); - try { - action.perform(); - } catch (Exception ex) { - LOG.warn("Exception occured during performing action: " - + StringUtils.stringifyException(ex)); - } - } - - @Override - public void init(PolicyContext context) throws Exception { - super.init(context); - for (Pair action : actions) { - action.getFirst().init(this.context); - } - } - } - - /** Selects a random item from the given items */ - static T selectRandomItem(T[] items) { - return items[RandomUtils.nextInt(items.length)]; - } - - /** Selects a random item from the given items with weights*/ - static T selectWeightedRandomItem(List> items) { - int totalWeight = 0; - for (Pair pair : items) { - totalWeight += pair.getSecond(); - } - - int cutoff = RandomUtils.nextInt(totalWeight); - int cummulative = 0; - T item = null; - - //warn: O(n) - for (int i=0; i List selectRandomItems(T[] items, float ratio) { - int remaining = (int)Math.ceil(items.length * ratio); - - List selectedItems = new ArrayList(remaining); - - for (int i=0; i 0; i++) { - if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) { - selectedItems.add(items[i]); - remaining--; - } - } - - return selectedItems; - } - - /** - * All actions that deal with RS's with the following weights (relative probabilities): - * - Restart active master (sleep 5 sec) : 2 - * - Restart random regionserver (sleep 5 sec) : 2 - * - Restart random regionserver (sleep 60 sec) : 2 - * - Restart META regionserver (sleep 5 sec) : 1 - * - Batch restart of 50% of regionservers (sleep 5 sec) : 2 - * - Rolling restart of 100% of regionservers (sleep 5 sec) : 2 - */ - @SuppressWarnings("unchecked") - private static final List> ALL_ACTIONS = Lists.newArrayList( - new Pair(new RestartActiveMaster(FIVE_SEC), 2), - new Pair(new RestartRandomRs(FIVE_SEC), 2), - new Pair(new RestartRandomRs(ONE_MIN), 2), - new Pair(new RestartRsHoldingMeta(FIVE_SEC), 1), - new Pair(new BatchRestartRs(FIVE_SEC, 0.5f), 2), - new Pair(new RollingBatchRestartRs(FIVE_SEC, 1.0f), 2) - ); - - public static final String EVERY_MINUTE_RANDOM_ACTION_POLICY = "EVERY_MINUTE_RANDOM_ACTION_POLICY"; - - private Policy[] policies; - private Thread[] monkeyThreads; - - public void start() throws Exception { - monkeyThreads = new Thread[policies.length]; - - for (int i=0; i NAMED_POLICIES = Maps.newHashMap(); - static { - NAMED_POLICIES.put(EVERY_MINUTE_RANDOM_ACTION_POLICY, - new PeriodicRandomActionPolicy(ONE_MIN, ALL_ACTIONS)); - } - - @Override - protected void addOptions() { - addOptWithArg("policy", "a named policy defined in ChaosMonkey.java. Possible values: " - + NAMED_POLICIES.keySet()); - //we can add more options, and make policies more configurable - } - - @Override - protected void processOptions(CommandLine cmd) { - String[] policies = cmd.getOptionValues("policy"); - if (policies != null) { - setPoliciesByName(policies); - } - } - - @Override - protected int doWork() throws Exception { - start(); - waitForStop(); - return 0; - } - - public static void main(String[] args) throws Exception { - Configuration conf = HBaseConfiguration.create(); - IntegrationTestingUtility.setUseDistributedCluster(conf); - IntegrationTestingUtility util = new IntegrationTestingUtility(conf); - util.initializeCluster(1); - - ChaosMonkey monkey = new ChaosMonkey(util, EVERY_MINUTE_RANDOM_ACTION_POLICY); - int ret = ToolRunner.run(conf, monkey, args); - System.exit(ret); - } -} diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java index 1e6c254..229470f 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java @@ -56,7 +56,7 @@ public class LoadTestTool extends AbstractHBaseTool { protected static final String DEFAULT_TABLE_NAME = "cluster_test"; /** Column family used by the test */ - protected static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf"); + public static byte[] COLUMN_FAMILY = Bytes.toBytes("test_cf"); /** Column families used by the test */ protected static final byte[][] COLUMN_FAMILIES = { COLUMN_FAMILY }; -- 1.7.10.2 (Apple Git-33)