Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenInitializing.java (revision 0) @@ -0,0 +1,135 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.LocalHBaseCluster; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.io.MapWritable; +import org.apache.hadoop.io.Writable; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests region server termination during startup. + */ +@Category(LargeTests.class) +public class TestRSKilledWhenInitializing { + private static boolean masterActive = false; + + private static AtomicBoolean firstRS = new AtomicBoolean(true); + + /** + * Test verifies whether a region server is removing from online servers list in master if it went + * down after registering with master. + * @throws Exception + */ + @Test(timeout = 180000) + public void testRSTermnationAfterRegisteringToMasterBeforeCreatingEphemeralNod() throws Exception { + + final int NUM_MASTERS = 1; + final int NUM_RS = 2; + firstRS.set(true); + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + + // Start the cluster + final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL.startMiniDFSCluster(3); + TEST_UTIL.startMiniZKCluster(); + TEST_UTIL.createRootDir(); + final LocalHBaseCluster cluster = + new LocalHBaseCluster(conf, NUM_MASTERS, NUM_RS, HMaster.class, MockedRegionServer.class); + final MasterThread master = cluster.getMasters().get(0); + master.start(); + try { + long startTime = System.currentTimeMillis(); + while (!master.getMaster().isActiveMaster()) { + try { + Thread.sleep(100); + } catch (InterruptedException ignored) { + } + if (System.currentTimeMillis() > startTime + 30000) { + throw new RuntimeException("Master not active after 30 seconds"); + } + } + masterActive = true; + cluster.getRegionServers().get(0).start(); + cluster.getRegionServers().get(1).start(); + Thread.sleep(10000); + List onlineServersList = + master.getMaster().getServerManager().getOnlineServersList(); + while (onlineServersList.size()!=1) { + Thread.sleep(100); + onlineServersList = master.getMaster().getServerManager().getOnlineServersList(); + } + assertEquals(onlineServersList.size(), 1); + } finally { + masterActive = false; + firstRS.set(true); + TEST_UTIL.shutdownMiniZKCluster(); + TEST_UTIL.cleanupTestDir(); + TEST_UTIL.shutdownMiniDFSCluster(); + } + } + + public static class MockedRegionServer extends MiniHBaseCluster.MiniHBaseClusterRegionServer { + + public MockedRegionServer(Configuration conf) throws IOException, InterruptedException { + super(conf); + } + + @Override + protected void handleReportForDutyResponse(final MapWritable c) throws IOException { + if (firstRS.getAndSet(false)) { + for (Map.Entry e : c.entrySet()) { + String key = e.getKey().toString(); + // The hostname the master sees us as. + if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) { + String hostnameFromMasterPOV = e.getValue().toString(); + assertEquals(super.getRpcServer().getListenerAddress().getHostName(), + hostnameFromMasterPOV); + } + } + while (!masterActive) { + Threads.sleep(100); + } + super.kill(); + } else { + super.handleReportForDutyResponse(c); + } + } + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1536557) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -466,6 +466,8 @@ this.rpcServer.setQosFunction(new QosFunction()); this.startcode = System.currentTimeMillis(); + conf.set("hbase.regionserver.rpc.client.socket.bind.address", this.isa.getHostName()); + // login the zookeeper client principal (if using security) ZKUtil.loginClient(this.conf, "hbase.zookeeper.client.keytab.file", "hbase.zookeeper.client.kerberos.principal", this.isa.getHostName()); @@ -738,6 +740,9 @@ } try { + // Set our ephemeral znode up in zookeeper now we have a name. + createMyEphemeralNode(); + // Try and register with the Master; tell it we are here. Break if // server is stopped or the clusterup flag is down or hdfs went wacky. while (keepLooping()) { @@ -1080,8 +1085,6 @@ this.conf.set("mapred.task.id", "hb_rs_" + this.serverNameFromMasterPOV.toString()); } - // Set our ephemeral znode up in zookeeper now we have a name. - createMyEphemeralNode(); // Master sent us hbase.rootdir to use. Should be fully qualified // path with file system specification included. Set 'fs.defaultFS' Index: src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java (revision 1536557) +++ src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java (working copy) @@ -87,6 +87,7 @@ protected final boolean tcpKeepAlive; // if T then use keepalives protected int pingInterval; // how often sends ping to the server in msecs protected int socketTimeout; // socket timeout + protected final InetSocketAddress bindAddress; // address to bind to the client socket protected FailedServers failedServers; protected final SocketFactory socketFactory; // how to create sockets @@ -386,6 +387,7 @@ this.socket = socketFactory.createSocket(); this.socket.setTcpNoDelay(tcpNoDelay); this.socket.setKeepAlive(tcpKeepAlive); + if (bindAddress != null) this.socket.bind(bindAddress); // connection time out is 20s NetUtils.connect(this.socket, remoteId.getAddress(), getSocketTimeout(conf)); @@ -870,6 +872,12 @@ this.clusterId = conf.get(HConstants.CLUSTER_ID, "default"); this.connections = new PoolMap( getPoolType(conf), getPoolSize(conf)); + String hostName = this.conf.get("hbase.regionserver.rpc.client.socket.bind.address"); + if (hostName != null) { + this.bindAddress = new InetSocketAddress(hostName, 0); + } else { + this.bindAddress = null; + } this.failedServers = new FailedServers(conf); }