Index: src/test/java/org/apache/hadoop/hbase/master/TestExcludeList.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestExcludeList.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/TestExcludeList.java (revision 0) @@ -0,0 +1,103 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; +import java.io.FileWriter; +import java.io.IOException; +import java.io.BufferedWriter; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; +import org.junit.Test; + +public class TestExcludeList { + private static final Log LOG = LogFactory.getLog(TestExcludeList.class); + + /** + * Simple test of exclude file list. + *

+ * This test starts a region server, then adds it to the excludes file, + * asks the master to refresh it and verifies that the region server has + * actually been kicked out. + * + * @throws Exception + */ + @Test (timeout=180000) + public void testSimpleExclude() throws Exception { + + final int NUM_MASTERS = 1; + final int NUM_RS = 1; + + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + String exFilePath = HBaseTestingUtility.getTestDir().toString()+Path.SEPARATOR+"excludes"; + + // change the configuration and expire the region server + conf.set("hbase.hosts.exclude", exFilePath); + + // Start the cluster + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + + // start a new region server + JVMClusterUtil.RegionServerThread rservThread = cluster.startRegionServer(); + + HServerInfo info = rservThread.getRegionServer().getHServerInfo(); + + // make sure the new server is up and running + assertTrue(cluster.getMaster().getServerManager().getOnlineServersList().contains(info)); + + // write the info of this reqion server to the excludes list + try { + BufferedWriter out = new BufferedWriter(new FileWriter(exFilePath)); + out.write(info.getHostnamePort()); + out.close(); + } catch (IOException e) { + } + + // Ask master to refresh its exclude list. This should expire the server + cluster.getMaster().refreshNodes(); + + // verify the region server is indeed offline. + assertTrue(!cluster.getMaster().getServerManager().getOnlineServersList().contains(info)); + + } +} Index: src/main/java/org/apache/hadoop/hbase/NodeExcludedException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/NodeExcludedException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/NodeExcludedException.java (revision 0) @@ -0,0 +1,33 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; + +/** + * This exception is thrown when a region server that is not there in the + * include list or is there in the exclude list tries to report for duty. + * + */ +public class NodeExcludedException extends IOException { + public NodeExcludedException(String message) { + super(message); + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 13899) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -117,6 +117,7 @@ import org.apache.hadoop.hbase.regionserver.wal.WALObserver; import org.apache.hadoop.hbase.replication.regionserver.Replication; import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.NodeExcludedException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CompressionTest; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -618,7 +619,7 @@ if (e instanceof IOException) { e = RemoteExceptionHandler.checkIOException((IOException) e); } - if (e instanceof YouAreDeadException) { + if (e instanceof YouAreDeadException || e instanceof NodeExcludedException) { // This will be caught and handled as a fatal error below throw e; } @@ -702,7 +703,7 @@ this.leases.close(); HConnectionManager.deleteConnection(conf, true); this.zooKeeper.close(); - if (!killed) { + if (!killed && !abortRequested) { join(); } LOG.info(Thread.currentThread().getName() + " exiting"); @@ -759,7 +760,7 @@ if (ioe instanceof RemoteException) { ioe = ((RemoteException)ioe).unwrapRemoteException(); } - if (ioe instanceof YouAreDeadException) { + if (ioe instanceof YouAreDeadException || ioe instanceof NodeExcludedException) { // This will be caught and handled as a fatal error in run() throw ioe; } @@ -1566,7 +1567,14 @@ ioe); // Re-throw IOE will cause RS to abort throw ioe; - } else { + } + if (ioe instanceof NodeExcludedException) { + LOG.fatal("Master rejected because this node has been put in the exclusion list", + ioe); + // Re-throw IOE will cause RS to abort + throw ioe; + } + else { LOG.warn("remote error telling master we are up", e); } } catch (IOException e) { Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 13899) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -37,6 +38,7 @@ import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; +import org.apache.hadoop.hbase.NodeExcludedException; import org.apache.hadoop.hbase.PleaseHoldException; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.YouAreDeadException; @@ -49,6 +51,7 @@ import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; +import org.apache.hadoop.util.HostsFileReader; /** * The ServerManager class manages info about region servers - HServerInfo, @@ -91,19 +94,28 @@ private final long maxSkew; + private final String HOSTS_PROP_NAME = "hbase.hosts"; + private final String EXCLUDED_HOSTS_PROP_NAME = "hbase.hosts.exclude"; + + private HostsFileReader hostsReader; + /** * Constructor. * @param master * @param services * @param metrics + * @throws IOException */ public ServerManager(final Server master, final MasterServices services, - MasterMetrics metrics) { + MasterMetrics metrics) throws IOException { this.master = master; this.services = services; this.metrics = metrics; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); + this.hostsReader = new HostsFileReader( + c.get(HOSTS_PROP_NAME,""), + c.get(EXCLUDED_HOSTS_PROP_NAME,"")); this.deadservers = new DeadServer(c.getInt("hbase.master.maxdeadservers", 100)); } @@ -123,14 +135,62 @@ // is, reject the server and trigger its expiration. The next time it comes // in, it should have been removed from serverAddressToServerInfo and queued // for processing by ProcessServerShutdown. - HServerInfo info = new HServerInfo(serverInfo); + HServerInfo info = new HServerInfo(serverInfo); + if (!checkHostsLists(info)) { + throw new NodeExcludedException("This node is not allowed to join the hbase cluster"); + } checkIsDead(info.getServerName(), "STARTUP"); checkAlreadySameHostPort(info); checkClockSkew(info, serverCurrentTime); recordNewServer(info, false, null); } + /** + * Keeps track of which datanodes/ipaddress are allowed to connect to the namenode. + * @param info The info of the server to check. + */ + public boolean checkHostsLists(HServerInfo info) { + Set hostsList = hostsReader.getHosts(); + boolean inHostsList = + (hostsList.isEmpty() || + (hostsList.contains(info.getHostname())) || + (hostsList.contains(info.getHostnamePort()))); + + Set excludeList = hostsReader.getExcludedHosts(); + boolean inExcludedHostsList = + (excludeList.contains(info.getHostname()) || + excludeList.contains(info.getHostnamePort())); + + return (inHostsList && !inExcludedHostsList); + } + /** + * Rereads the config to get hosts and exclude list file names. + * Rereads the files to update the hosts and exclude lists. It + * checks if any of the hosts have changed states: + * 1. Added to hosts --> no further work needed here. + * 2. Removed from hosts --> expire the server and failover the regions. + * 3. Added to exclude --> expire the server and failover the regions. + * 4. Removed from exclude --> Allow the region server to come back in again. + */ + public void refreshNodes() throws IOException { + // Reread the config to get hbase.hosts and hbase.hosts.exclude filenames. + // Update the file names and refresh internal includes and excludes list + + Configuration conf = this.master.getConfiguration(); + hostsReader.updateFileNames(conf.get(HOSTS_PROP_NAME,""), + conf.get(EXCLUDED_HOSTS_PROP_NAME, "")); + hostsReader.refresh(); + for (HServerInfo serverInfo :this.onlineServers.values()) { + // Check if the server is allowed. + if (!checkHostsLists(serverInfo)) { + // this server has been disallowed - kick it out. + expireServer(serverInfo); + } + } + } + + /** * Test to see if we have a server of same host and port already. * @param serverInfo * @throws PleaseHoldException @@ -243,6 +303,12 @@ // Be careful. This method does returns in the middle. HServerInfo info = new HServerInfo(serverInfo); + // If the region server is not allowed, throw an exception and the + // region server will shut itself down + if (!checkHostsLists(info)) { + throw new NodeExcludedException("This node is not allowed to join the hbase cluster"); + } + // Check if dead. If it is, it'll get a 'You Are Dead!' exception. checkIsDead(info.getServerName(), "REPORT"); Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 13899) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -1048,6 +1048,17 @@ return ProtocolSignature.getProtocolSigature( this, protocol, clientVersion, clientMethodsHash); } + + /** + * Refresh the nodes list files. This function refreshes both the includes + * and the excludes files. + * @throws IOException + */ + @Override + public void refreshNodes() + throws IOException { + this.getServerManager().refreshNodes(); + } /** * Utility for constructing an instance of the passed HMaster class. Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (revision 13899) +++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (working copy) @@ -187,4 +187,11 @@ * @return Previous balancer value */ public boolean balanceSwitch(final boolean b); + + /** + * Refresh the nodes list files. This function refreshes both the includes + * and the excludes files. + * @throws IOException + */ + public void refreshNodes() throws IOException; } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 13899) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -991,6 +991,16 @@ } /** + * Refresh the nodes list files. This function refreshes both the includes + * and the excludes files. + * @throws IOException + */ + public void refreshNodes() throws IOException + { + getMaster().refreshNodes(); + } + + /** * Turn the load balancer on or off. * @param b If true, enable balancer. If false, disable balancer. * @return Previous balancer value Index: src/main/ruby/hbase/admin.rb =================================================================== --- src/main/ruby/hbase/admin.rb (revision 13899) +++ src/main/ruby/hbase/admin.rb (working copy) @@ -193,6 +193,12 @@ end #---------------------------------------------------------------------------------------------- + # Refresh the nodes files + def refreshNodes() + @admin.refreshNodes() + end + + #---------------------------------------------------------------------------------------------- # Returns table's structure description def describe(table_name) tables = @admin.listTables.to_a Index: src/main/ruby/shell/commands/refresh_nodes.rb =================================================================== --- src/main/ruby/shell/commands/refresh_nodes.rb (revision 0) +++ src/main/ruby/shell/commands/refresh_nodes.rb (revision 0) @@ -0,0 +1,40 @@ +# +# Copyright 2010 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class RefreshNodes < Command + def help + return <<-EOF +Refresh the nodes list files. This function refreshes both the includes +and the excludes files. Use with caution. For expert use only. Examples: + + hbase> refresh_nodes +EOF + end + + def command() + format_simple_command do + admin.refreshNodes() + end + end + end + end +end Index: src/main/ruby/shell.rb =================================================================== --- src/main/ruby/shell.rb (revision 13899) +++ src/main/ruby/shell.rb (working copy) @@ -256,6 +256,7 @@ flush major_compact move + refresh_nodes split unassign zk_dump Index: bin/hbase-daemon.sh =================================================================== --- bin/hbase-daemon.sh (revision 13899) +++ bin/hbase-daemon.sh (working copy) @@ -145,7 +145,7 @@ echo "ulimit -n `ulimit -n`" >> $loglog 2>&1 nohup nice -n $HBASE_NICENESS "$HBASE_HOME"/bin/hbase \ --config "${HBASE_CONF_DIR}" \ - $command $startStop "$@" > "$logout" 2>&1 < /dev/null & + $command "$@" $startStop > "$logout" 2>&1 < /dev/null & echo $! > $pid sleep 1; head "$logout" ;;