Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -351,7 +351,7 @@ */ public void close() { try { - if(zooKeeper != null) { + if (zooKeeper != null) { zooKeeper.close(); // super.close(); } Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -87,6 +87,7 @@ import org.apache.hadoop.hbase.client.MultiResponse; import org.apache.hadoop.hbase.client.Row; import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MultiPut; import org.apache.hadoop.hbase.client.MultiPutResponse; import org.apache.hadoop.hbase.client.Put; @@ -508,9 +509,9 @@ HBaseRPC.stopProxy(this.hbaseMaster); this.hbaseMaster = null; } - + this.leases.close(); + HConnectionManager.deleteConnection(conf, true); this.zooKeeper.close(); - if (!killed) { join(); } @@ -707,6 +708,9 @@ // Init in here rather than in constructor after thread name has been set this.metrics = new RegionServerMetrics(); startServiceThreads(); + LOG.info("Serving as " + this.serverInfo.getServerName() + + ", sessionid=0x" + + Long.toHexString(this.zooKeeper.getZooKeeper().getSessionId())); isOnline = true; } catch (Throwable e) { this.isOnline = false; Index: src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java (working copy) @@ -126,7 +126,11 @@ public JVMClusterUtil.RegionServerThread addRegionServer(final int index) throws IOException { - JVMClusterUtil.RegionServerThread rst = JVMClusterUtil.createRegionServerThread(this.conf, + // Create each regionserver with its own Configuration instance so each has + // its HConnection instance rather than share (see HBASE_INSTANCES down in + // the guts of HConnectionManager. + JVMClusterUtil.RegionServerThread rst = + JVMClusterUtil.createRegionServerThread(new Configuration(this.conf), this.regionServerClass, index); this.regionThreads.add(rst); return rst; @@ -254,4 +258,4 @@ admin.createTable(htd); cluster.shutdown(); } -} +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ServerConnection; @@ -265,7 +266,8 @@ this.clusterStatusTracker.start(); LOG.info("Server active/primary master; " + this.address + - "; clusterStarter=" + this.clusterStarter); + "; clusterStarter=" + this.clusterStarter + ", sessionid=0x" + + Long.toHexString(this.zooKeeper.getZooKeeper().getSessionId())); } /** @@ -319,8 +321,9 @@ this.rpcServer.stop(); if (this.balancerChore != null) this.balancerChore.interrupt(); this.activeMasterManager.stop(); + this.executorService.shutdown(); + HConnectionManager.deleteConnection(this.conf, true); this.zooKeeper.close(); - this.executorService.shutdown(); LOG.info("HMaster main thread exiting"); } Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -22,14 +22,13 @@ import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; -import java.util.Map.Entry; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArraySet; @@ -53,7 +52,6 @@ import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterAddressTracker; import org.apache.hadoop.hbase.MasterNotRunningException; -import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; @@ -63,11 +61,12 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.SoftValueSortedMap; import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; import org.apache.hadoop.hbase.zookeeper.ZKTableDisable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.KeeperException; @@ -93,11 +92,11 @@ // A LRU Map of Configuration hashcode -> TableServers. We set instances to 31. // The zk default max connections to the ensemble from the one client is 30 so // should run into zk issues before hit this value of 31. - private static final Map HBASE_INSTANCES = - new LinkedHashMap + private static final Map HBASE_INSTANCES = + new LinkedHashMap ((int) (MAX_CACHED_HBASE_INSTANCES/0.75F)+1, 0.75F, true) { @Override - protected boolean removeEldestEntry(Map.Entry eldest) { + protected boolean removeEldestEntry(Map.Entry eldest) { return size() > MAX_CACHED_HBASE_INSTANCES; } }; @@ -119,11 +118,11 @@ */ public static HConnection getConnection(Configuration conf) throws ZooKeeperConnectionException { - TableServers connection; + HConnectionImplementation connection; synchronized (HBASE_INSTANCES) { connection = HBASE_INSTANCES.get(conf); if (connection == null) { - connection = new TableServers(conf); + connection = new HConnectionImplementation(conf); HBASE_INSTANCES.put(conf, connection); } } @@ -137,7 +136,7 @@ */ public static void deleteConnection(Configuration conf, boolean stopProxy) { synchronized (HBASE_INSTANCES) { - TableServers t = HBASE_INSTANCES.remove(conf); + HConnectionImplementation t = HBASE_INSTANCES.remove(conf); if (t != null) { t.close(stopProxy); } @@ -151,7 +150,7 @@ */ public static void deleteAllConnections(boolean stopProxy) { synchronized (HBASE_INSTANCES) { - for (TableServers t : HBASE_INSTANCES.values()) { + for (HConnectionImplementation t : HBASE_INSTANCES.values()) { if (t != null) { t.close(stopProxy); } @@ -168,7 +167,7 @@ static int getCachedRegionCount(Configuration conf, byte[] tableName) throws ZooKeeperConnectionException { - TableServers connection = (TableServers)getConnection(conf); + HConnectionImplementation connection = (HConnectionImplementation)getConnection(conf); return connection.getNumberOfCachedRegionLocations(tableName); } @@ -180,13 +179,13 @@ */ static boolean isRegionCached(Configuration conf, byte[] tableName, byte[] row) throws ZooKeeperConnectionException { - TableServers connection = (TableServers)getConnection(conf); + HConnectionImplementation connection = (HConnectionImplementation)getConnection(conf); return connection.isRegionCached(tableName, row); } /* Encapsulates connection to zookeeper and regionservers.*/ - static class TableServers implements ServerConnection, Abortable { - static final Log LOG = LogFactory.getLog(TableServers.class); + static class HConnectionImplementation implements HConnection, Abortable { + static final Log LOG = LogFactory.getLog(HConnectionImplementation.class); private final Class serverInterfaceClass; private final long pause; private final int numRetries; @@ -203,7 +202,6 @@ // ZooKeeper-based master address tracker private MasterAddressTracker masterAddressTracker; - private final Object rootRegionLock = new Object(); private final Object metaRegionLock = new Object(); private final Object userRegionLock = new Object(); @@ -213,8 +211,7 @@ private final Map servers = new ConcurrentHashMap(); - // Used by master and region servers during safe mode only - private volatile HRegionLocation rootRegionLocation; + private final RootRegionTracker rootRegionTracker; private final Map> cachedRegionLocations = @@ -230,7 +227,7 @@ * @param conf Configuration object */ @SuppressWarnings("unchecked") - public TableServers(Configuration conf) + public HConnectionImplementation(Configuration conf) throws ZooKeeperConnectionException { this.conf = conf; @@ -265,6 +262,9 @@ zooKeeper.registerListener(masterAddressTracker); masterAddressTracker.start(); + this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); + this.rootRegionTracker.start(); + this.master = null; this.masterChecked = false; } @@ -277,20 +277,6 @@ return this.pause * HConstants.RETRY_BACKOFF[ntries]; } - // Used by master and region servers during safe mode only - public void unsetRootRegionLocation() { - this.rootRegionLocation = null; - } - - // Used by master and region servers during safe mode only - public void setRootRegionLocation(HRegionLocation rootRegion) { - if (rootRegion == null) { - throw new IllegalArgumentException( - "Cannot set root region location to null."); - } - this.rootRegionLocation = rootRegion; - } - public HMasterInterface getMaster() throws MasterNotRunningException, ZooKeeperConnectionException { @@ -528,15 +514,14 @@ } if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) { - synchronized (rootRegionLock) { - // This block guards against two threads trying to find the root - // region at the same time. One will go do the find while the - // second waits. The second thread will not do find. - - if (!useCache || rootRegionLocation == null) { - this.rootRegionLocation = locateRootRegion(); - } - return this.rootRegionLocation; + try { + HServerAddress hsa = + this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout); + if (hsa == null) return null; + return new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, hsa); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return null; } } else if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) { return locateRegionInMeta(HConstants.ROOT_TABLE_NAME, tableName, row, @@ -923,8 +908,10 @@ throws ZooKeeperConnectionException { if(zooKeeper == null) { try { - zooKeeper = new ZooKeeperWatcher(conf, - ZKUtil.getZooKeeperClusterKey(conf), this); + this.zooKeeper = new ZooKeeperWatcher(conf, + ZKUtil.getZooKeeperClusterKey(conf), this); + LOG.debug("zkw created, sessionid=0x" + + Long.toHexString(this.zooKeeper.getZooKeeper().getSessionId())); } catch (IOException e) { throw new ZooKeeperConnectionException(e); } @@ -932,105 +919,6 @@ return zooKeeper; } - /** - * Repeatedly try to find the root region in ZK - * @return HRegionLocation for root region if found - * @throws NoServerForRegionException - if the root region can not be - * located after retrying - * @throws IOException - */ - private HRegionLocation locateRootRegion() - throws IOException { - - // We lazily instantiate the ZooKeeper object because we don't want to - // make the constructor have to throw IOException or handle it itself. - ZooKeeperWatcher zk; - try { - zk = getZooKeeperWatcher(); - } catch (IOException e) { - throw new ZooKeeperConnectionException(e); - } - - HServerAddress rootRegionAddress = null; - for (int tries = 0; tries < numRetries; tries++) { - int localTimeouts = 0; - // ask the master which server has the root region - while (rootRegionAddress == null && localTimeouts < numRetries) { - // Don't read root region until we're out of safe mode so we know - // that the meta regions have been assigned. - try { - rootRegionAddress = ZKUtil.getDataAsAddress(zk, zk.rootServerZNode); - } catch (KeeperException e) { - LOG.error("Unexpected ZooKeeper error attempting to read the root " + - "region server address"); - throw new IOException(e); - } - if (rootRegionAddress == null) { - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Sleeping " + getPauseTime(tries) + - "ms, waiting for root region."); - } - Thread.sleep(getPauseTime(tries)); - } catch (InterruptedException iex) { - // continue - } - localTimeouts++; - } - } - - if (rootRegionAddress == null) { - throw new NoServerForRegionException( - "Timed out trying to locate root region"); - } - - try { - // Get a connection to the region server - HRegionInterface server = getHRegionConnection(rootRegionAddress); - // if this works, then we're good, and we have an acceptable address, - // so we can stop doing retries and return the result. - server.getRegionInfo(HRegionInfo.ROOT_REGIONINFO.getRegionName()); - if (LOG.isDebugEnabled()) { - LOG.debug("Found ROOT at " + rootRegionAddress); - } - break; - } catch (Throwable t) { - t = translateException(t); - - if (tries == numRetries - 1) { - throw new NoServerForRegionException("Timed out trying to locate "+ - "root region because: " + t.getMessage()); - } - - // Sleep and retry finding root region. - try { - if (LOG.isDebugEnabled()) { - LOG.debug("Root region location changed. Sleeping."); - } - Thread.sleep(getPauseTime(tries)); - if (LOG.isDebugEnabled()) { - LOG.debug("Wake. Retry finding root region."); - } - } catch (InterruptedException iex) { - // continue - } - } - - rootRegionAddress = null; - } - - // if the address is null by this point, then the retries have failed, - // and we're sort of sunk - if (rootRegionAddress == null) { - throw new NoServerForRegionException( - "unable to locate root region server"); - } - - // return the region location - return new HRegionLocation( - HRegionInfo.ROOT_REGIONINFO, rootRegionAddress); - } - public T getRegionServerWithRetries(ServerCallable callable) throws IOException, RuntimeException { List exceptions = new ArrayList(); @@ -1070,40 +958,6 @@ } } - private HRegionLocation - getRegionLocationForRowWithRetries(byte[] tableName, byte[] rowKey, - boolean reload) - throws IOException { - boolean reloadFlag = reload; - List exceptions = new ArrayList(); - HRegionLocation location = null; - int tries = 0; - for (; tries < numRetries;) { - try { - location = getRegionLocation(tableName, rowKey, reloadFlag); - } catch (Throwable t) { - exceptions.add(t); - } - if (location != null) { - break; - } - reloadFlag = true; - tries++; - try { - Thread.sleep(getPauseTime(tries)); - } catch (InterruptedException e) { - // continue - } - } - if (location == null) { - throw new RetriesExhaustedException(" -- nothing found, no 'location' returned," + - " tableName=" + Bytes.toString(tableName) + - ", reload=" + reload + " --", - HConstants.EMPTY_BYTE_ARRAY, rowKey, tries, exceptions); - } - return location; - } - /** * @deprecated Use HConnectionManager::processBatch instead. */ @@ -1150,6 +1004,12 @@ HBaseRPC.stopProxy(i); } } + if (this.zooKeeper != null) { + LOG.debug("Closed zookeeper sessionid=0x" + + Long.toHexString(this.zooKeeper.getZooKeeper().getSessionId())); + this.zooKeeper.close(); + this.zooKeeper = null; + } } private Callable createCallable( @@ -1412,10 +1272,6 @@ public void abort(final String msg, Throwable t) { if (t != null) LOG.fatal(msg, t); else LOG.fatal(msg); - if(zooKeeper != null) { - zooKeeper.close(); - zooKeeper = null; - } } } } Index: src/main/java/org/apache/hadoop/hbase/client/ServerConnection.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/ServerConnection.java (revision 993550) +++ src/main/java/org/apache/hadoop/hbase/client/ServerConnection.java (working copy) @@ -1,41 +0,0 @@ -/** - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.client; - -import org.apache.hadoop.hbase.HRegionLocation; - -/** - * Used by master and region server, so that they do not need to wait for the - * cluster to be up to get a connection. - */ -public interface ServerConnection extends HConnection { - /** - * Set root region location in connection - * @param rootRegion region location for root region - */ - public void setRootRegionLocation(HRegionLocation rootRegion); - - /** - * Unset the root region location in the connection. Called by - * ServerManager.processRegionClose. - */ - public void unsetRootRegionLocation(); -}