diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java index 2d46279..1a8244a 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/ClusterManager.java @@ -31,32 +31,6 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; */ @InterfaceAudience.Private interface ClusterManager extends Configurable { - /** - * Type of the service daemon - */ - public static enum ServiceType { - HADOOP_NAMENODE("namenode"), - HADOOP_DATANODE("datanode"), - HADOOP_JOBTRACKER("jobtracker"), - HADOOP_TASKTRACKER("tasktracker"), - HBASE_MASTER("master"), - HBASE_REGIONSERVER("regionserver"); - - private String name; - - ServiceType(String name) { - this.name = name; - } - - public String getName() { - return name; - } - - @Override - public String toString() { - return getName(); - } - } /** * Start the service on the given host @@ -76,23 +50,30 @@ interface ClusterManager extends Configurable { /** * Kills the service running on the given host */ - void kill(ServiceType service, String hostname, int port) throws IOException; + void kill(ServiceType service, String hostname, int port, int pid) throws IOException; /** * Suspends the service running on the given host */ - void suspend(ServiceType service, String hostname, int port) throws IOException; + void suspend(ServiceType service, String hostname, int port, int pid) throws IOException; /** * Resumes the services running on the given host */ - void resume(ServiceType service, String hostname, int port) throws IOException; + void resume(ServiceType service, String hostname, int port, int pid) throws IOException; + + /** + * Returns whether the service is running on the remote host. + *

+ * If pid is given, this will check whether the service is running, or else this only checks + * whether there's any thread running with the service type. + */ + boolean isRunning(ServiceType service, String hostname, int pid) throws IOException; /** - * Returns whether the service is running on the remote host. This only checks whether the - * service still has a pid. + * Get pid of the service thread running on the target hostname */ - boolean isRunning(ServiceType service, String hostname, int port) throws IOException; + int getServicePid(ServiceType service, String hostname, int port) throws IOException; /* TODO: further API ideas: * diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java index 6e7cd33..cf67778 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java @@ -25,7 +25,6 @@ import java.util.Set; import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ClusterManager.ServiceType; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ClusterConnection; @@ -111,11 +110,11 @@ public class DistributedHBaseCluster extends HBaseCluster { } @Override - public void killRegionServer(ServerName serverName) throws IOException { + public void killRegionServer(ServerName serverName, int pid) throws IOException { LOG.info("Aborting RS: " + serverName.getServerName()); clusterManager.kill(ServiceType.HBASE_REGIONSERVER, serverName.getHostname(), - serverName.getPort()); + serverName.getPort(), pid); } @Override @@ -127,17 +126,18 @@ public class DistributedHBaseCluster extends HBaseCluster { } @Override - public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException { - waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout); + public void waitForRegionServerToStop(ServerName serverName, long timeout, int pid) + throws IOException { + waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout, pid); } - private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout) - throws IOException { + private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout, + int pid) throws IOException { LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName()); long start = System.currentTimeMillis(); while ((System.currentTimeMillis() - start) < timeout) { - if (!clusterManager.isRunning(service, serverName.getHostname(), serverName.getPort())) { + if (!clusterManager.isRunning(service, serverName.getHostname(), pid)) { return; } Threads.sleep(1000); @@ -158,9 +158,9 @@ public class DistributedHBaseCluster extends HBaseCluster { } @Override - public void killMaster(ServerName serverName) throws IOException { + public void killMaster(ServerName serverName, int pid) throws IOException { LOG.info("Aborting Master: " + serverName.getServerName()); - clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname(), serverName.getPort()); + clusterManager.kill(ServiceType.HBASE_MASTER, serverName.getHostname(), serverName.getPort(), pid); } @Override @@ -170,8 +170,8 @@ public class DistributedHBaseCluster extends HBaseCluster { } @Override - public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException { - waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout); + public void waitForMasterToStop(ServerName serverName, long timeout, int pid) throws IOException { + waitForServiceToStop(ServiceType.HBASE_MASTER, serverName, timeout, pid); } @Override @@ -419,4 +419,10 @@ public class DistributedHBaseCluster extends HBaseCluster { LOG.info("Added new HBaseAdmin"); return true; } + + @Override + public int getServicePid(ServiceType serviceType, ServerName serverName) throws IOException { + return clusterManager + .getServicePid(serviceType, serverName.getHostname(), serverName.getPort()); + } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index 8bdb5d6..15bfbfc 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -148,17 +148,52 @@ public class HBaseClusterManager extends Configured implements ClusterManager { public abstract String getCommand(ServiceType service, Operation op); - public String isRunningCommand(ServiceType service) { - return findPidCommand(service); + /** + * Compose a command to check whether the given service is running with the given pid, or if pid + * not given, check whether there're any thread running with the service name + * @param service type of the service + * @param pid expecting pid of the service + * @return the command to check running + */ + public String isRunningCommand(ServiceType service, int pid) { + // use "tr" and "cut" to avoid the shell execution returns non-zero value + if (pid > 0) { + return String.format( + "ps aux | grep proc_%s | grep %d | grep -v grep | tr -s ' ' | cut -d ' ' -f2", service, + pid); + } else { + return String.format("ps aux | grep proc_%s | grep -v grep | tr -s ' ' | cut -d ' ' -f2", + service); + } } - protected String findPidCommand(ServiceType service) { - return String.format("ps aux | grep proc_%s | grep -v grep | tr -s ' ' | cut -d ' ' -f2", + /** + * Compose a command to get the pid of the service thread. + *

+ * If port is given, get pid through the port, or else from the service name. + *

+ * Notice that we might got incorrect pid w/o port since there might be more than one thread + * running with the same service name + * @param service type of the service to get pid of + * @param port the port to which the service should be listening to + * @return the command to get pid + */ + protected String findPidCommand(ServiceType service, int port) { + if (port > 0) { + return String.format( + "netstat -nltp | grep %d | tr -s ' ' | cut -d ' ' -f7 | cut -d '/' -f1", port); + } else { + return String.format("ps aux | grep proc_%s | grep -v grep | tr -s ' ' | cut -d ' ' -f2", service); + } } - public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); + public String signalCommand(ServiceType service, String signal, int port, int pid) { + if (pid > 0) { + return String.format("kill -s %s %d", signal, pid); + } else { + return String.format("%s | xargs kill -s %s", findPidCommand(service, port), signal); + } } } @@ -270,29 +305,53 @@ public class HBaseClusterManager extends Configured implements ClusterManager { exec(hostname, service, Operation.RESTART); } - public void signal(ServiceType service, String signal, String hostname) throws IOException { - execWithRetries(hostname, getCommandProvider(service).signalCommand(service, signal)); + public void signal(ServiceType service, String signal, String hostname, int port, int pid) + throws IOException { + execWithRetries(hostname, getCommandProvider(service).signalCommand(service, signal, port, pid)); } @Override - public boolean isRunning(ServiceType service, String hostname, int port) throws IOException { - String ret = execWithRetries(hostname, getCommandProvider(service).isRunningCommand(service)) - .getSecond(); + public boolean isRunning(ServiceType service, String hostname, int pid) throws IOException { + String ret = + execWithRetries(hostname, getCommandProvider(service).isRunningCommand(service, pid)) + .getSecond(); return ret.length() > 0; } + /** + * Get pid of the service thread running on the target hostname + *

+ * Notice that we might got incorrect pid w/o port since there might be more than one thread + * running with the same service name + * @param service type of the service + * @param hostname host to check against + * @param port to which port the service thread should be listening + * @return pid of the service thread running on the target host + * @throws IOException if shell execution failed too many times + */ + @Override + public int getServicePid(ServiceType service, String hostname, int port) throws IOException { + String pidStr = + execWithRetries(hostname, getCommandProvider(service).findPidCommand(service, port)) + .getSecond(); + if (pidStr == null || pidStr.isEmpty()) { + return -1; + } + return Integer.parseInt(pidStr.trim()); + } + @Override - public void kill(ServiceType service, String hostname, int port) throws IOException { - signal(service, SIGKILL, hostname); + public void kill(ServiceType service, String hostname, int port, int pid) throws IOException { + signal(service, SIGKILL, hostname, port, pid); } @Override - public void suspend(ServiceType service, String hostname, int port) throws IOException { - signal(service, SIGSTOP, hostname); + public void suspend(ServiceType service, String hostname, int port, int pid) throws IOException { + signal(service, SIGSTOP, hostname, port, pid); } @Override - public void resume(ServiceType service, String hostname, int port) throws IOException { - signal(service, SIGCONT, hostname); + public void resume(ServiceType service, String hostname, int port, int pid) throws IOException { + signal(service, SIGCONT, hostname, port, pid); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java index 9ea126a..e230874 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/RESTApiClusterManager.java @@ -34,6 +34,7 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriBuilder; import javax.xml.ws.http.HTTPException; + import java.io.IOException; import java.net.URI; import java.util.HashMap; @@ -135,7 +136,7 @@ public class RESTApiClusterManager extends Configured implements ClusterManager } @Override - public boolean isRunning(ServiceType service, String hostname, int port) throws IOException { + public boolean isRunning(ServiceType service, String hostname, int pid) throws IOException { String serviceName = getServiceName(roleServiceType.get(service)); String hostId = getHostId(hostname); String roleState = getRoleState(serviceName, service.toString(), hostId); @@ -152,18 +153,18 @@ public class RESTApiClusterManager extends Configured implements ClusterManager } @Override - public void kill(ServiceType service, String hostname, int port) throws IOException { - hBaseClusterManager.kill(service, hostname, port); + public void kill(ServiceType service, String hostname, int port, int pid) throws IOException { + hBaseClusterManager.kill(service, hostname, port, pid); } @Override - public void suspend(ServiceType service, String hostname, int port) throws IOException { - hBaseClusterManager.suspend(service, hostname, port); + public void suspend(ServiceType service, String hostname, int port, int pid) throws IOException { + hBaseClusterManager.suspend(service, hostname, port, pid); } @Override - public void resume(ServiceType service, String hostname, int port) throws IOException { - hBaseClusterManager.resume(service, hostname, port); + public void resume(ServiceType service, String hostname, int port, int pid) throws IOException { + hBaseClusterManager.resume(service, hostname, port, pid); } @@ -347,4 +348,9 @@ public class RESTApiClusterManager extends Configured implements ClusterManager private enum Service { HBASE, HDFS, MAPREDUCE } + + @Override + public int getServicePid(ServiceType service, String hostname, int port) throws IOException { + return hBaseClusterManager.getServicePid(service, hostname, port); + } } \ No newline at end of file diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java index a3afccd..e087749 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/Action.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.ServiceType; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HBaseAdmin; @@ -107,8 +108,9 @@ public class Action { protected void killMaster(ServerName server) throws IOException { LOG.info("Killing master:" + server); - cluster.killMaster(server); - cluster.waitForMasterToStop(server, killMasterTimeout); + int pid = cluster.getServicePid(ServiceType.HBASE_MASTER, server); + cluster.killMaster(server, pid); + cluster.waitForMasterToStop(server, killMasterTimeout, pid); LOG.info("Killed master server:" + server); } @@ -121,8 +123,9 @@ public class Action { protected void killRs(ServerName server) throws IOException { LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); - cluster.waitForRegionServerToStop(server, killRsTimeout); + int pid = cluster.getServicePid(ServiceType.HBASE_MASTER, server); + cluster.killRegionServer(server, pid); + cluster.waitForRegionServerToStop(server, killRsTimeout, pid); LOG.info("Killed region server:" + server + ". Reported num of rs:" + cluster.getClusterStatus().getServersSize()); } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java index b6a5b50..093d1e9 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/BatchRestartRsAction.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.chaos.actions; import java.util.List; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.ServiceType; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; /** @@ -43,11 +44,13 @@ public class BatchRestartRsAction extends RestartActionBaseAction { for (ServerName server : selectedServers) { LOG.info("Killing region server:" + server); - cluster.killRegionServer(server); + int pid = cluster.getServicePid(ServiceType.HBASE_REGIONSERVER, server); + cluster.killRegionServer(server, pid); } for (ServerName server : selectedServers) { - cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); + int pid = cluster.getServicePid(ServiceType.HBASE_REGIONSERVER, server); + cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT, pid); } LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:" diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java index e6f181b..226de67 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java @@ -124,9 +124,10 @@ public abstract class HBaseCluster implements Closeable, Configurable { /** * Kills the region server process if this is a distributed cluster, otherwise * this causes the region server to exit doing basic clean up only. + * @param pid TODO * @throws IOException if something goes wrong */ - public abstract void killRegionServer(ServerName serverName) throws IOException; + public abstract void killRegionServer(ServerName serverName, int pid) throws IOException; /** * Stops the given region server, by attempting a gradual stop. @@ -160,7 +161,7 @@ public abstract class HBaseCluster implements Closeable, Configurable { * @return whether the operation finished with success * @throws IOException if something goes wrong or timeout occurs */ - public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) + public abstract void waitForRegionServerToStop(ServerName serverName, long timeout, int pid) throws IOException; /** @@ -177,7 +178,7 @@ public abstract class HBaseCluster implements Closeable, Configurable { * this causes master to exit doing basic clean up only. * @throws IOException if something goes wrong */ - public abstract void killMaster(ServerName serverName) throws IOException; + public abstract void killMaster(ServerName serverName, int pid) throws IOException; /** * Stops the given master, by attempting a gradual stop. @@ -189,7 +190,7 @@ public abstract class HBaseCluster implements Closeable, Configurable { * Wait for the specified master to stop the thread / process. * @throws IOException if something goes wrong or timeout occurs */ - public abstract void waitForMasterToStop(ServerName serverName, long timeout) + public abstract void waitForMasterToStop(ServerName serverName, long timeout, int pid) throws IOException; /** @@ -290,4 +291,8 @@ public abstract class HBaseCluster implements Closeable, Configurable { public void waitForDatanodesRegistered(int nbDN) throws Exception { } + + public int getServicePid(ServiceType serviceType, ServerName serverName) throws IOException { + return 0; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 38d92d3..2246eef 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -239,7 +239,7 @@ public class MiniHBaseCluster extends HBaseCluster { } @Override - public void killRegionServer(ServerName serverName) throws IOException { + public void killRegionServer(ServerName serverName, int pid) throws IOException { HRegionServer server = getRegionServer(getRegionServerIndex(serverName)); if (server instanceof MiniHBaseClusterRegionServer) { LOG.info("Killing " + server.toString()); @@ -255,8 +255,9 @@ public class MiniHBaseCluster extends HBaseCluster { } @Override - public void waitForRegionServerToStop(ServerName serverName, long timeout) throws IOException { - //ignore timeout for now + public void waitForRegionServerToStop(ServerName serverName, long timeout, int pid) + throws IOException { + // ignore timeout for now waitOnRegionServer(getRegionServerIndex(serverName)); } @@ -266,7 +267,7 @@ public class MiniHBaseCluster extends HBaseCluster { } @Override - public void killMaster(ServerName serverName) throws IOException { + public void killMaster(ServerName serverName, int pid) throws IOException { abortMaster(getMasterIndex(serverName)); } @@ -276,8 +277,8 @@ public class MiniHBaseCluster extends HBaseCluster { } @Override - public void waitForMasterToStop(ServerName serverName, long timeout) throws IOException { - //ignore timeout for now + public void waitForMasterToStop(ServerName serverName, long timeout, int pid) throws IOException { + // ignore timeout for now waitOnMaster(getMasterIndex(serverName)); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ServiceType.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ServiceType.java new file mode 100644 index 0000000..f39ba87 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ServiceType.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +/** + * Type of the service daemon + */ +public enum ServiceType { + HADOOP_NAMENODE("namenode"), + HADOOP_DATANODE("datanode"), + HADOOP_JOBTRACKER("jobtracker"), + HADOOP_TASKTRACKER("tasktracker"), + HBASE_MASTER("master"), + HBASE_REGIONSERVER("regionserver"); + + private String name; + + ServiceType(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + @Override + public String toString() { + return getName(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java index 0e5bd9c..f9dae59 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java @@ -191,10 +191,10 @@ public class TestMetaWithReplicas { // kill the master so that regionserver recovery is not triggered at all // for the meta server util.getHBaseClusterInterface().stopMaster(master); - util.getHBaseClusterInterface().waitForMasterToStop(master, 60000); + util.getHBaseClusterInterface().waitForMasterToStop(master, 60000, -1); if (!master.equals(primary)) { - util.getHBaseClusterInterface().killRegionServer(primary); - util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000); + util.getHBaseClusterInterface().killRegionServer(primary, -1); + util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000, -1); } ((ClusterConnection)c).clearRegionCache(); htable.close(); @@ -253,7 +253,7 @@ public class TestMetaWithReplicas { throws Exception { ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); TEST_UTIL.getHBaseClusterInterface().stopMaster(sn); - TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000); + TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000, -1); List metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); assert(metaZnodes.size() == originalReplicaCount); //we should have what was configured before TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM, @@ -415,7 +415,7 @@ public class TestMetaWithReplicas { locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true); HRegionLocation hrl = rl.getRegionLocation(1); ServerName oldServer = hrl.getServerName(); - TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer); + TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer, -1); int i = 0; do { LOG.debug("Waiting for the replica " + hrl.getRegionInfo() + " to come up"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java index c710d3c..20ed529 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java @@ -618,7 +618,7 @@ public class TestRegionObserverInterface { tableName, new Integer[] {0, 0, 1, 1, 0, 0}); - cluster.killRegionServer(rs1.getRegionServer().getServerName()); + cluster.killRegionServer(rs1.getRegionServer().getServerName(), -1); Threads.sleep(1000); // Let the kill soak in. util.waitUntilAllRegionsAssigned(tableName); LOG.info("All regions assigned"); @@ -670,7 +670,7 @@ public class TestRegionObserverInterface { tableName, new Integer[] {0, 0, 1, 1, 0, 0}); - cluster.killRegionServer(rs1.getRegionServer().getServerName()); + cluster.killRegionServer(rs1.getRegionServer().getServerName(), -1); Threads.sleep(1000); // Let the kill soak in. util.waitUntilAllRegionsAssigned(tableName); LOG.info("All regions assigned"); @@ -709,7 +709,7 @@ public class TestRegionObserverInterface { put.add(C, C, C); table.put(put); - cluster.killRegionServer(rs1.getRegionServer().getServerName()); + cluster.killRegionServer(rs1.getRegionServer().getServerName(), -1); Threads.sleep(20000); // just to be sure that the kill has fully started. util.waitUntilAllRegionsAssigned(tableName); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 51861d6..f4c482a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -137,9 +137,9 @@ public class TestAssignmentManagerOnCluster { metaState.getServerName(), master.getServerName()); assertEquals("Meta should be on the meta server", metaState.getServerName(), metaServerName); - cluster.killRegionServer(metaServerName); + cluster.killRegionServer(metaServerName, -1); stoppedARegionServer = true; - cluster.waitForRegionServerToStop(metaServerName, 60000); + cluster.waitForRegionServerToStop(metaServerName, 60000, -1); // Wait for SSH to finish final ServerName oldServerName = metaServerName; @@ -800,8 +800,8 @@ public class TestAssignmentManagerOnCluster { ServerName oldServerName = state.getServerName(); if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) { // Kill the hosting server, which doesn't have meta on it. - cluster.killRegionServer(oldServerName); - cluster.waitForRegionServerToStop(oldServerName, -1); + cluster.killRegionServer(oldServerName, -1); + cluster.waitForRegionServerToStop(oldServerName, -1, -1); break; } int i = cluster.getServerWithMeta(); @@ -863,9 +863,9 @@ public class TestAssignmentManagerOnCluster { final ServerName serverName = rs.getServerName(); // Wait till SSH tried to assign regions a several times int counter = MyLoadBalancer.counter.get() + 5; - cluster.killRegionServer(serverName); + cluster.killRegionServer(serverName, -1); startAServer = true; - cluster.waitForRegionServerToStop(serverName, -1); + cluster.waitForRegionServerToStop(serverName, -1, -1); while (counter > MyLoadBalancer.counter.get()) { Thread.sleep(1000); } @@ -983,8 +983,8 @@ public class TestAssignmentManagerOnCluster { assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); // Kill the hosting server, which doesn't have meta on it. - cluster.killRegionServer(oldServerName); - cluster.waitForRegionServerToStop(oldServerName, -1); + cluster.killRegionServer(oldServerName, -1); + cluster.waitForRegionServerToStop(oldServerName, -1, -1); ServerManager serverManager = master.getServerManager(); while (!serverManager.isServerDead(oldServerName) @@ -1059,8 +1059,8 @@ public class TestAssignmentManagerOnCluster { master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE); // Kill the hosting server, which doesn't have meta on it. - cluster.killRegionServer(oldServerName); - cluster.waitForRegionServerToStop(oldServerName, -1); + cluster.killRegionServer(oldServerName, -1); + cluster.waitForRegionServerToStop(oldServerName, -1, -1); ServerManager serverManager = master.getServerManager(); while (!serverManager.isServerDead(oldServerName) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 614f6fb..9b95229 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -353,7 +353,7 @@ public class TestMasterFailover { // Now kill master, meta should remain on rs, where we placed it before. log("Aborting master"); activeMaster.abort("test-kill"); - cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000, -1); log("Master has aborted"); // meta should remain where it was @@ -391,7 +391,7 @@ public class TestMasterFailover { log("Aborting master"); activeMaster.abort("test-kill"); - cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000, -1); log("Master has aborted"); // Start up a new master @@ -420,7 +420,7 @@ public class TestMasterFailover { log("Aborting master"); activeMaster.abort("test-kill"); - cluster.waitForMasterToStop(activeMaster.getServerName(), 30000); + cluster.waitForMasterToStop(activeMaster.getServerName(), 30000, -1); log("Master has aborted"); rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest( diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java index 67e1801..a974c33 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterOperationsForRegionReplicas.java @@ -154,7 +154,7 @@ public class TestMasterOperationsForRegionReplicas { // Now kill the master, restart it and see if the assignments are kept ServerName master = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); TEST_UTIL.getHBaseClusterInterface().stopMaster(master); - TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000); + TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(master, 30000, -1); TEST_UTIL.getHBaseClusterInterface().startMaster(master.getHostname(), master.getPort()); TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); for (int i = 0; i < numRegions; i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java index 69c5f89..e13bac2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java @@ -219,7 +219,7 @@ public class TestRegionPlacement { } int orig = TEST_UTIL.getHBaseCluster().getMaster().assignmentManager.getNumRegionsOpened(); TEST_UTIL.getHBaseCluster().stopRegionServer(serverToKill); - TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(serverToKill, 60000); + TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(serverToKill, 60000, -1); int curr = TEST_UTIL.getHBaseCluster().getMaster().assignmentManager.getNumRegionsOpened(); while (curr - orig < regionsToVerify.size()) { LOG.debug("Waiting for " + regionsToVerify.size() + " to come online " + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestEnableTableHandler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestEnableTableHandler.java index f5c8b90..b4dbdbf 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestEnableTableHandler.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestEnableTableHandler.java @@ -100,7 +100,7 @@ public class TestEnableTableHandler { // now stop region servers JVMClusterUtil.RegionServerThread rs = cluster.getRegionServerThreads().get(0); rs.getRegionServer().stop("stop"); - cluster.waitForRegionServerToStop(rs.getRegionServer().getServerName(), 10000); + cluster.waitForRegionServerToStop(rs.getRegionServer().getServerName(), 10000, -1); LOG.debug("Now enabling table " + tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java index c8d3a62..1b84e03 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterFailoverWithProcedures.java @@ -468,7 +468,7 @@ public class TestMasterFailoverWithProcedures { // Kill the master HMaster oldMaster = cluster.getMaster(); - cluster.killMaster(cluster.getMaster().getServerName()); + cluster.killMaster(cluster.getMaster().getServerName(), -1); // Wait the secondary waitBackupMaster(testUtil, oldMaster); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java index 97512ce..62e9ec7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java @@ -99,7 +99,7 @@ public class TestServerCrashProcedure { // Kill a server. Master will notice but do nothing other than add it to list of dead servers. HRegionServer hrs = this.util.getHBaseCluster().getRegionServer(0); boolean carryingMeta = master.getAssignmentManager().isCarryingMeta(hrs.getServerName()); - this.util.getHBaseCluster().killRegionServer(hrs.getServerName()); + this.util.getHBaseCluster().killRegionServer(hrs.getServerName(), -1); hrs.join(); // Wait until the expiration of the server has arrived at the master. We won't process it // by queuing a ServerCrashProcedure because we have disabled crash processing... but wait