Index: src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 958800) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -693,7 +693,9 @@ */ public void expireMasterSession() throws Exception { HMaster master = hbaseCluster.getMaster(); - expireSession(master.getZooKeeperWrapper()); + ZooKeeperWrapper zkw = + ZooKeeperWrapper.getInstance(conf, master.getHServerAddress().toString()); + expireSession(zkw); } /** Index: src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (revision 958800) +++ src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (working copy) @@ -157,7 +157,7 @@ int regionCount = getRegionCount(); List servers = getOnlineRegionServers(); - double avg = cluster.getMaster().getAverageLoad(); + double avg = cluster.getMaster().getServerManager().getAverageLoad(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop)); int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1; LOG.debug("There are " + servers.size() + " servers and " + regionCount Index: src/main/java/org/apache/hadoop/hbase/ServerStatus.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ServerStatus.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/ServerStatus.java (working copy) @@ -37,14 +37,4 @@ * Get the configuration object for this server. */ public Configuration getConfiguration(); - - // TODO: make sure the functions below work on the region server also, or get - // moved to the MasterStatus interface - public AtomicBoolean getShutdownRequested(); - - public AtomicBoolean getClosed(); - - public boolean isClosed(); - - public void shutdown(); } Index: src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -126,6 +126,15 @@ /** Parameter name for how often threads should wake up */ public static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency"; + /** Default value for thread wake frequency */ + public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000; + + /** Number of retries for the client */ + public static final String NUM_CLIENT_RETRIES = "hbase.client.retries.number"; + + /** Default number of retries for the client */ + public static final int DEFAULT_NUM_CLIENT_RETRIES = 2; + /** Parameter name for how often a region should should perform a major compaction */ public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction"; Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -75,6 +75,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.NavigableSet; import java.util.Random; import java.util.Set; Index: src/main/java/org/apache/hadoop/hbase/master/MasterStatus.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterStatus.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/MasterStatus.java (working copy) @@ -19,22 +19,10 @@ */ package org.apache.hadoop.hbase.master; -import java.io.IOException; -import java.util.List; -import java.util.Set; -import java.util.SortedMap; -import java.util.concurrent.locks.Lock; +import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.ServerStatus; -import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ServerConnection; -import org.apache.hadoop.hbase.ipc.HRegionInterface; -import org.apache.hadoop.hbase.master.metrics.MasterMetrics; -import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; /** * These are the set of functions implemented by the HMaster and accessed by @@ -45,48 +33,45 @@ */ public interface MasterStatus extends ServerStatus { + /** + * Return the server manager for region server related info + */ public ServerManager getServerManager(); + /** + * Return the region manager for region related info + */ public RegionManager getRegionManager(); + + /** + * Return the file system manager for dealing with FS related stuff + */ + public FileSystemManager getFileSystemManager(); + /** + * Is this the master that is starting the cluster up? If true, yes. + * Otherwise this is a failed over master. + */ public boolean isClusterStartup(); - - public FileSystem getFileSystem(); - - public Path getOldLogDir(); - - public ZooKeeperWrapper getZooKeeperWrapper(); - - public void startShutdown(); - - public MasterMetrics getMetrics(); - - public RegionServerOperationQueue getRegionServerOperationQueue(); - + + /** + * Return the server RPC connection + */ public ServerConnection getServerConnection(); - public boolean checkFileSystem(); + // TODO: the semantics of the following methods should be defined. Once that + // is clear, most of these should move to server status - public int getThreadWakeFrequency(); - - public int getNumRetries(); - - public void deleteEmptyMetaRows(HRegionInterface s, - byte [] metaRegionName, - List emptyRows); - - public HRegionInfo getHRegionInfo(final byte [] row, final Result res) throws IOException; - - public Path getRootDir(); - - public Lock getSplitLogLock(); - - public int numServers(); - - public void getLightServers(final HServerLoad l, - SortedMap> m); - - public SortedMap> getLoadToServers(); - - public double getAverageLoad(); + // start shutting down the server + public void startShutdown(); + // is a shutdown requested + public AtomicBoolean getShutdownRequested(); + // sets the closed variable in the master to true + public void setClosed(); + // returns the closed atomic boolean + public AtomicBoolean getClosed(); + // returns the boolean value of the closed atomic boolean + public boolean isClosed(); + // is the server shutdown + public void shutdown(); } Index: src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/MetaScanner.java (working copy) @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.master; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.RemoteExceptionHandler; import java.io.IOException; @@ -44,6 +45,8 @@ private final List metaRegionsToRescan = new ArrayList(); + + protected static int threadWakeFrequency; /** * Constructor @@ -52,6 +55,7 @@ */ public MetaScanner(MasterStatus masterStatus) { super(masterStatus, false, masterStatus.getShutdownRequested()); + threadWakeFrequency = masterStatus.getConfiguration().getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); } // Don't retry if we get an error while scanning. Errors are most often @@ -86,7 +90,7 @@ return false; } // Make sure the file system is still available - this.masterStatus.checkFileSystem(); + this.masterStatus.getFileSystemManager().checkFileSystem(); } catch (Exception e) { // If for some reason we get some other kind of exception, // at least log it rather than go out silently. @@ -102,7 +106,7 @@ (region == null && metaRegionsToScan.size() > 0) && !metaRegionsScanned()) { try { - region = metaRegionsToScan.poll(this.masterStatus.getThreadWakeFrequency(), + region = metaRegionsToScan.poll(this.threadWakeFrequency, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { // continue @@ -164,7 +168,7 @@ } } try { - wait(this.masterStatus.getThreadWakeFrequency()); + wait(this.threadWakeFrequency); } catch (InterruptedException e) { // continue } Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -36,8 +36,10 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.master.RegionManager.RegionState; +import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; import org.apache.zookeeper.Watcher.Event.EventType; @@ -86,6 +88,8 @@ new ConcurrentHashMap(); private MasterStatus masterStatus; + private RegionServerOperationQueue regionServerOperationQueue; + private MasterMetrics masterMetrics; /* The regionserver will not be assigned or asked close regions if it * is currently opening >= this many regions. @@ -138,8 +142,11 @@ * Constructor. * @param masterStatus */ - public ServerManager(MasterStatus masterStatus) { + public ServerManager(MasterStatus masterStatus, MasterMetrics masterMetrics, + RegionServerOperationQueue regionServerOperationQueue) { this.masterStatus = masterStatus; + this.masterMetrics = masterMetrics; + this.regionServerOperationQueue = regionServerOperationQueue; Configuration c = masterStatus.getConfiguration(); this.nobalancingCount = c.getInt("hbase.regions.nobalancing.count", 4); int metaRescanInterval = c.getInt("hbase.master.meta.thread.rescanfrequency", @@ -153,7 +160,8 @@ this.oldLogCleaner = new OldLogsCleaner( c.getInt("hbase.master.meta.thread.rescanfrequency",60 * 1000), this.masterStatus.getShutdownRequested(), c, - masterStatus.getFileSystem(), masterStatus.getOldLogDir()); + masterStatus.getFileSystemManager().getFileSystem(), + masterStatus.getFileSystemManager().getOldLogDir()); Threads.setDaemonThreadRunning(oldLogCleaner, n + ".oldLogCleaner"); @@ -238,7 +246,10 @@ // We must set this watcher here because it can be set on a fresh start // or on a failover Watcher watcher = new ServerExpirer(new HServerInfo(info)); - this.masterStatus.getZooKeeperWrapper().updateRSLocationGetWatch(info, watcher); + ZooKeeperWrapper zkw = ZooKeeperWrapper.getInstance( + masterStatus.getConfiguration(), + masterStatus.getHServerAddress().toString()); + zkw.updateRSLocationGetWatch(info, watcher); this.serversToServerInfo.put(serverName, info); this.serversToLoad.put(serverName, load); synchronized (this.loadToServers) { @@ -403,7 +414,7 @@ this.serversToServerInfo.put(serverInfo.getServerName(), serverInfo); HServerLoad load = this.serversToLoad.get(serverInfo.getServerName()); if (load != null) { - this.masterStatus.getMetrics().incrementRequests(load.getNumberOfRequests()); + masterMetrics.incrementRequests(load.getNumberOfRequests()); if (!load.equals(serverInfo.getLoad())) { updateLoadToServers(serverInfo.getServerName(), load); } @@ -450,8 +461,7 @@ LOG.info("Processing " + incomingMsgs[i] + " from " + serverInfo.getServerName() + "; " + (i + 1) + " of " + incomingMsgs.length); - if (!this.masterStatus.getRegionServerOperationQueue(). - process(serverInfo, incomingMsgs[i])) { + if (!regionServerOperationQueue.process(serverInfo, incomingMsgs[i])) { continue; } switch (incomingMsgs[i].getType()) { @@ -623,7 +633,7 @@ this.masterStatus.getRegionManager().setOpen(region.getRegionNameAsString()); RegionServerOperation op = new ProcessRegionOpen(masterStatus, serverInfo, region); - this.masterStatus.getRegionServerOperationQueue().put(op); + regionServerOperationQueue.put(op); } } } @@ -662,7 +672,7 @@ this.masterStatus.getRegionManager().setClosed(region.getRegionNameAsString()); RegionServerOperation op = new ProcessRegionClose(masterStatus, region, offlineRegion, reassignRegion); - this.masterStatus.getRegionServerOperationQueue().put(op); + regionServerOperationQueue.put(op); } } @@ -801,7 +811,7 @@ * a MSG_REGIONSERVER_STOP. */ void letRegionServersShutdown() { - if (!masterStatus.checkFileSystem()) { + if (!masterStatus.getFileSystemManager().checkFileSystem()) { // Forget waiting for the region servers if the file system has gone // away. Just exit as quickly as possible. return; @@ -870,8 +880,7 @@ LOG.debug("Added=" + serverName + " to dead servers, added shutdown processing operation"); this.deadServers.add(serverName); - this.masterStatus.getRegionServerOperationQueue(). - put(new ProcessServerShutdown(masterStatus, info)); + regionServerOperationQueue.put(new ProcessServerShutdown(masterStatus, info)); } /** @@ -933,4 +942,8 @@ public void setMinimumServerCount(int minimumServerCount) { this.minimumServerCount = minimumServerCount; } + + public RegionServerOperationQueue getRegionServerOperationQueue() { + return this.regionServerOperationQueue; + } } Index: src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (working copy) @@ -117,7 +117,7 @@ } ZooKeeperWrapper zkWrapper = ZooKeeperWrapper.getInstance(masterStatus.getConfiguration(), - HMaster.class.getName()); + masterStatus.getHServerAddress().toString()); zkWrapper.deleteUnassignedRegion(regionInfo.getEncodedName()); return true; } Index: src/main/java/org/apache/hadoop/hbase/master/TableOperation.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/TableOperation.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/TableOperation.java (working copy) @@ -94,7 +94,7 @@ if (values == null || values.isEmpty()) { break; } - HRegionInfo info = this.masterStatus.getHRegionInfo(values.getRow(), values); + HRegionInfo info = RegionManager.getHRegionInfo(values.getRow(), values); if (info == null) { emptyRows.add(values.getRow()); LOG.error(Bytes.toString(HConstants.CATALOG_FAMILY) + ":" @@ -137,7 +137,7 @@ LOG.warn("Found " + emptyRows.size() + " rows with empty HRegionInfo while scanning meta region " + Bytes.toString(m.getRegionName())); - masterStatus.deleteEmptyMetaRows(server, m.getRegionName(), emptyRows); + RegionManager.deleteEmptyMetaRows(server, m.getRegionName(), emptyRows); } if (!tableExists) { Index: src/main/java/org/apache/hadoop/hbase/master/TableDelete.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/TableDelete.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/TableDelete.java (working copy) @@ -61,8 +61,8 @@ // Delete the region try { HRegion.removeRegionFromMETA(server, m.getRegionName(), i.getRegionName()); - HRegion.deleteRegion(this.master.getFileSystem(), - this.master.getRootDir(), i); + HRegion.deleteRegion(this.master.getFileSystemManager().getFileSystem(), + this.master.getFileSystemManager().getRootDir(), i); } catch (IOException e) { LOG.error("failed to delete region " + Bytes.toString(i.getRegionName()), @@ -71,7 +71,8 @@ } // delete the table's folder from fs. - this.master.getFileSystem().delete(new Path(this.master.getRootDir(), - Bytes.toString(this.tableName)), true); + this.master.getFileSystemManager().getFileSystem().delete( + new Path(this.master.getFileSystemManager().getRootDir(), + Bytes.toString(this.tableName)), true); } } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java (working copy) @@ -22,6 +22,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; @@ -44,20 +45,29 @@ protected final Sleeper sleeper; protected final MetaRegion m; protected final MasterStatus masterStatus; + protected static int numRetries; + protected static int threadWakeFrequency; protected HRegionInterface server; protected RetryableMetaOperation(MetaRegion m, MasterStatus masterStatus) { this.m = m; this.masterStatus = masterStatus; - this.sleeper = new Sleeper(this.masterStatus.getThreadWakeFrequency(), - this.masterStatus.getClosed()); + threadWakeFrequency = + masterStatus.getConfiguration().getInt( + HConstants.THREAD_WAKE_FREQUENCY, + HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); + this.sleeper = new Sleeper(threadWakeFrequency, this.masterStatus.getClosed()); + numRetries = + masterStatus.getConfiguration().getInt( + HConstants.NUM_CLIENT_RETRIES, + HConstants.DEFAULT_NUM_CLIENT_RETRIES); } protected T doWithRetries() throws IOException, RuntimeException { List exceptions = new ArrayList(); - for (int tries = 0; tries < this.masterStatus.getNumRetries(); tries++) { + for (int tries = 0; tries < numRetries; tries++) { if (this.masterStatus.isClosed()) { return null; } @@ -74,7 +84,7 @@ if (e instanceof RemoteException) { e = RemoteExceptionHandler.decodeRemoteException((RemoteException) e); } - if (tries == this.masterStatus.getNumRetries() - 1) { + if (tries == numRetries - 1) { if (LOG.isDebugEnabled()) { StringBuilder message = new StringBuilder( "Trying to contact region server for regionName '" + @@ -86,7 +96,7 @@ } LOG.debug(message); } - this.masterStatus.checkFileSystem(); + this.masterStatus.getFileSystemManager().checkFileSystem(); throw e; } if (LOG.isDebugEnabled()) { Index: src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java (working copy) @@ -56,7 +56,7 @@ throws IOException { this.serverName = masterStatus.getHServerAddress().toString(); this.serverManager = masterStatus.getServerManager(); - zkWrapper = ZooKeeperWrapper.getInstance(conf, HMaster.class.getName()); + zkWrapper = ZooKeeperWrapper.getInstance(conf, masterStatus.getHServerAddress().toString()); String unassignedZNode = zkWrapper.getRegionInTransitionZNode(); // If the UNASSIGNED ZNode exists and this is a fresh cluster start, then Index: src/main/java/org/apache/hadoop/hbase/master/RegionServerOperation.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/RegionServerOperation.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/RegionServerOperation.java (working copy) @@ -71,7 +71,7 @@ } protected void requeue() { - this.masterStatus.getRegionServerOperationQueue().putOnDelayQueue(this); + masterStatus.getServerManager().getRegionServerOperationQueue().putOnDelayQueue(this); } private long whenToExpire() { Index: src/main/java/org/apache/hadoop/hbase/master/RootScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/RootScanner.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/RootScanner.java (working copy) @@ -59,7 +59,7 @@ e = RemoteExceptionHandler.checkIOException(e); LOG.warn("Scan ROOT region", e); // Make sure the file system is still available - masterStatus.checkFileSystem(); + masterStatus.getFileSystemManager().checkFileSystem(); } catch (Exception e) { // If for some reason we get some other kind of exception, // at least log it rather than go out silently. Index: src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (working copy) @@ -78,7 +78,7 @@ this.logSplit = false; this.rootRescanned = false; this.rsLogDir = - new Path(masterStatus.getRootDir(), HLog.getHLogDirectoryName(serverInfo)); + new Path(masterStatus.getFileSystemManager().getRootDir(), HLog.getHLogDirectoryName(serverInfo)); // check to see if I am responsible for either ROOT or any of the META tables. @@ -177,7 +177,7 @@ Bytes.toString(row)); } - HRegionInfo info = masterStatus.getHRegionInfo(row, values); + HRegionInfo info = RegionManager.getHRegionInfo(row, values); if (info == null) { emptyRows.add(row); continue; @@ -228,7 +228,7 @@ LOG.warn("Found " + emptyRows.size() + " rows with empty HRegionInfo while scanning meta region " + Bytes.toString(regionName)); - masterStatus.deleteEmptyMetaRows(server, regionName, emptyRows); + RegionManager.deleteEmptyMetaRows(server, regionName, emptyRows); } // Update server in root/meta entries for (ToDoEntry e: toDoList) { @@ -291,16 +291,17 @@ masterStatus.getRegionManager().numOnlineMetaRegions()); if (!logSplit) { // Process the old log file - if (this.masterStatus.getFileSystem().exists(rsLogDir)) { - if (!masterStatus.getSplitLogLock().tryLock()) { + if (this.masterStatus.getFileSystemManager().getFileSystem().exists(rsLogDir)) { + if (!masterStatus.getFileSystemManager().getSplitLogLock().tryLock()) { return false; } try { - HLog.splitLog(masterStatus.getRootDir(), rsLogDir, - this.masterStatus.getOldLogDir(), this.masterStatus.getFileSystem(), - this.masterStatus.getConfiguration()); + HLog.splitLog(masterStatus.getFileSystemManager().getRootDir(), rsLogDir, + this.masterStatus.getFileSystemManager().getOldLogDir(), + this.masterStatus.getFileSystemManager().getFileSystem(), + this.masterStatus.getConfiguration()); } finally { - masterStatus.getSplitLogLock().unlock(); + masterStatus.getFileSystemManager().getSplitLogLock().unlock(); } } logSplit = true; Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -127,31 +127,24 @@ private final AtomicBoolean shutdownRequested = new AtomicBoolean(false); private final Configuration conf; - private final Path rootdir; private InfoServer infoServer; - private final int threadWakeFrequency; private final int numRetries; // Metrics is set when we call run. private final MasterMetrics metrics; - final Lock splitLogLock = new ReentrantLock(); - // Our zk client. private ZooKeeperWrapper zooKeeperWrapper; // Watcher for master address and for cluster shutdown. private final ZKMasterAddressWatcher zkMasterAddressWatcher; // A Sleeper that sleeps for threadWakeFrequency; sleep if nothing todo. private final Sleeper sleeper; - // Keep around for convenience. - private final FileSystem fs; - // Is the fileystem ok? - private volatile boolean fsOk = true; - // The Path to the old logs dir - private final Path oldLogDir; private final HBaseServer rpcServer; private final HServerAddress address; + + // file system manager for the master FS operations + private final FileSystemManager fileSystemManager; private final ServerConnection connection; private final ServerManager serverManager; @@ -172,31 +165,6 @@ public HMaster(Configuration conf) throws IOException { this.conf = conf; - // Figure out if this is a fresh cluster start. This is done by checking the - // number of RS ephemeral nodes. RS ephemeral nodes are created only after - // the primary master has written the address to ZK. So this has to be done - // before we race to write our address to zookeeper. - zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf, HMaster.class.getName()); - isClusterStartup = (zooKeeperWrapper.scanRSDirectory().size() == 0); - - // Set filesystem to be that of this.rootdir else we get complaints about - // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is - // default localfs. Presumption is that rootdir is fully-qualified before - // we get to here with appropriate fs scheme. - this.rootdir = FSUtils.getRootDir(this.conf); - // Cover both bases, the old way of setting default fs and the new. - // We're supposed to run on 0.20 and 0.21 anyways. - this.conf.set("fs.default.name", this.rootdir.toString()); - this.conf.set("fs.defaultFS", this.rootdir.toString()); - this.fs = FileSystem.get(this.conf); - checkRootDir(this.rootdir, this.conf, this.fs); - - // Make sure the region servers can archive their old logs - this.oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); - if(!this.fs.exists(this.oldLogDir)) { - this.fs.mkdirs(this.oldLogDir); - } - // Get my address and create an rpc server instance. The rpc-server port // can be ephemeral...ensure we have the correct info HServerAddress a = new HServerAddress(getMyAddress(this.conf)); @@ -206,12 +174,23 @@ this.address = new HServerAddress(this.rpcServer.getListenerAddress()); this.numRetries = conf.getInt("hbase.client.retries.number", 2); - this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, - 10 * 1000); - - this.sleeper = new Sleeper(this.threadWakeFrequency, this.closed); + int threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); + this.sleeper = new Sleeper(threadWakeFrequency, this.closed); this.connection = ServerConnectionManager.getConnection(conf); + // Figure out if this is a fresh cluster start. This is done by checking the + // number of RS ephemeral nodes. RS ephemeral nodes are created only after + // the primary master has written the address to ZK. So this has to be done + // before we race to write our address to zookeeper. + zooKeeperWrapper = ZooKeeperWrapper.createInstance(conf, getHServerAddress().toString()); + isClusterStartup = (zooKeeperWrapper.scanRSDirectory().size() == 0); + + // Create the filesystem manager, which in turn does the following: + // - Creates the root hbase directory in the FS + // - Checks the FS to make sure the root directory is readable + // - Creates the archive directory for logs + fileSystemManager = new FileSystemManager(conf, this); + // Get our zookeeper wrapper and then try to write our address to zookeeper. // We'll succeed if we are only master or if we win the race when many // masters. Otherwise we park here inside in writeAddressToZooKeeper. @@ -224,8 +203,13 @@ this.regionServerOperationQueue = new RegionServerOperationQueue(this.conf, this.closed); - serverManager = new ServerManager(this); + // set the thread name + setName(MASTER); + // create the master metrics object + this.metrics = new MasterMetrics(MASTER); + serverManager = new ServerManager(this, metrics, regionServerOperationQueue); + // Start the unassigned watcher - which will create the unassigned region // in ZK. This is needed before RegionManager() constructor tries to assign @@ -240,8 +224,6 @@ // start the region manager regionManager = new RegionManager(this); - setName(MASTER); - this.metrics = new MasterMetrics(MASTER); // We're almost open for business this.closed.set(false); LOG.info("HMaster initialized on " + this.address.toString()); @@ -264,77 +246,6 @@ } /* - * Get the rootdir. Make sure its wholesome and exists before returning. - * @param rd - * @param conf - * @param fs - * @return hbase.rootdir (after checks for existence and bootstrapping if - * needed populating the directory with necessary bootup files). - * @throws IOException - */ - private static Path checkRootDir(final Path rd, final Configuration c, - final FileSystem fs) - throws IOException { - // If FS is in safe mode wait till out of it. - FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, - 10 * 1000)); - // Filesystem is good. Go ahead and check for hbase.rootdir. - if (!fs.exists(rd)) { - fs.mkdirs(rd); - FSUtils.setVersion(fs, rd); - } else { - FSUtils.checkVersion(fs, rd, true); - } - // Make sure the root region directory exists! - if (!FSUtils.rootRegionExists(fs, rd)) { - bootstrap(rd, c); - } - return rd; - } - - private static void bootstrap(final Path rd, final Configuration c) - throws IOException { - LOG.info("BOOTSTRAP: creating ROOT and first META regions"); - try { - // Bootstrapping, make sure blockcache is off. Else, one will be - // created here in bootstap and it'll need to be cleaned up. Better to - // not make it in first place. Turn off block caching for bootstrap. - // Enable after. - HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO); - setInfoFamilyCaching(rootHRI, false); - HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO); - setInfoFamilyCaching(metaHRI, false); - HRegion root = HRegion.createHRegion(rootHRI, rd, c); - HRegion meta = HRegion.createHRegion(metaHRI, rd, c); - setInfoFamilyCaching(rootHRI, true); - setInfoFamilyCaching(metaHRI, true); - // Add first region from the META table to the ROOT region. - HRegion.addRegionToMETA(root, meta); - root.close(); - root.getLog().closeAndDelete(); - meta.close(); - meta.getLog().closeAndDelete(); - } catch (IOException e) { - e = RemoteExceptionHandler.checkIOException(e); - LOG.error("bootstrap", e); - throw e; - } - } - - /* - * @param hri Set all family block caching to b - * @param b - */ - private static void setInfoFamilyCaching(final HRegionInfo hri, final boolean b) { - for (HColumnDescriptor hcd: hri.getTableDesc().families.values()) { - if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) { - hcd.setBlockCacheEnabled(b); - hcd.setInMemory(b); - } - } - } - - /* * @return This masters' address. * @throws UnknownHostException */ @@ -348,24 +259,6 @@ return s; } - /** - * Checks to see if the file system is still accessible. - * If not, sets closed - * @return false if file system is not available - */ - public boolean checkFileSystem() { - if (this.fsOk) { - try { - FSUtils.checkFileSystemAvailable(this.fs); - } catch (IOException e) { - LOG.fatal("Shutting down HBase cluster: file system not available", e); - this.closed.set(true); - this.fsOk = false; - } - } - return this.fsOk; - } - /** @return HServerAddress of the master server */ public HServerAddress getMasterAddress() { return this.address; @@ -379,27 +272,15 @@ public InfoServer getInfoServer() { return this.infoServer; } - + /** - * @return HBase root dir. - * @throws IOException + * Return the file systen manager instance */ - public Path getRootDir() { - return this.rootdir; + public FileSystemManager getFileSystemManager() { + return fileSystemManager; } - public int getNumRetries() { - return this.numRetries; - } - /** - * @return Server metrics - */ - public MasterMetrics getMetrics() { - return this.metrics; - } - - /** * @return Return configuration being used by this server. */ public Configuration getConfiguration() { @@ -414,18 +295,14 @@ return this.regionManager; } - public int getThreadWakeFrequency() { - return this.threadWakeFrequency; - } - - public FileSystem getFileSystem() { - return this.fs; - } - public AtomicBoolean getShutdownRequested() { return this.shutdownRequested; } + public void setClosed() { + this.closed.set(true); + } + public AtomicBoolean getClosed() { return this.closed; } @@ -439,49 +316,23 @@ } /** - * Get the ZK wrapper object + * Get the ZK wrapper object - needed by master_jsp.java * @return the zookeeper wrapper */ public ZooKeeperWrapper getZooKeeperWrapper() { return this.zooKeeperWrapper; } - - // These methods are so don't have to pollute RegionManager with ServerManager. - public SortedMap> getLoadToServers() { - return this.serverManager.getLoadToServers(); + /** + * Get the HBase root dir - needed by master_jsp.java + */ + public Path getRootDir() { + return fileSystemManager.getRootDir(); } - - public int numServers() { - return this.serverManager.numServers(); - } - - public double getAverageLoad() { - return this.serverManager.getAverageLoad(); - } - + public RegionServerOperationQueue getRegionServerOperationQueue() { return this.regionServerOperationQueue; } - /** - * Get the directory where old logs go - * @return the dir - */ - public Path getOldLogDir() { - return this.oldLogDir; - } - - /** - * Add to the passed m servers that are loaded less than - * l. - * @param l - * @param m - */ - public void getLightServers(final HServerLoad l, - SortedMap> m) { - this.serverManager.getLightServers(l, m); - } - /** Main processing loop */ @Override public void run() { @@ -508,7 +359,7 @@ // If FAILED op processing, bad. Exit. break FINISHED; case REQUEUED_BUT_PROBLEM: - if (!checkFileSystem()) + if (!fileSystemManager.checkFileSystem()) // If bad filesystem, exit. break FINISHED; default: @@ -518,7 +369,7 @@ } } catch (Throwable t) { LOG.fatal("Unhandled exception. Starting shutdown.", t); - this.closed.set(true); + setClosed(); } // Wait for all the remaining region servers to report in. @@ -555,7 +406,7 @@ // Check if this is a fresh start of the cluster if (addresses.isEmpty()) { LOG.debug("Master fresh start, proceeding with normal startup"); - splitLogAfterStartup(); + fileSystemManager.splitLogAfterStartup(); return; } // Failover case. @@ -596,58 +447,10 @@ } LOG.info("Inspection found " + assignedRegions.size() + " regions, " + (isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA")); - splitLogAfterStartup(); + fileSystemManager.splitLogAfterStartup(); } /* - * Inspect the log directory to recover any log file without - * ad active region server. - */ - private void splitLogAfterStartup() { - Path logsDirPath = - new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); - try { - if (!this.fs.exists(logsDirPath)) return; - } catch (IOException e) { - throw new RuntimeException("Could exists for " + logsDirPath, e); - } - FileStatus[] logFolders; - try { - logFolders = this.fs.listStatus(logsDirPath); - } catch (IOException e) { - throw new RuntimeException("Failed listing " + logsDirPath.toString(), e); - } - if (logFolders == null || logFolders.length == 0) { - LOG.debug("No log files to split, proceeding..."); - return; - } - for (FileStatus status : logFolders) { - String serverName = status.getPath().getName(); - LOG.info("Found log folder : " + serverName); - if(this.serverManager.getServerInfo(serverName) == null) { - LOG.info("Log folder doesn't belong " + - "to a known region server, splitting"); - this.splitLogLock.lock(); - Path logDir = - new Path(this.rootdir, HLog.getHLogDirectoryName(serverName)); - try { - HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, getConfiguration()); - } catch (IOException e) { - LOG.error("Failed splitting " + logDir.toString(), e); - } finally { - this.splitLogLock.unlock(); - } - } else { - LOG.info("Log folder belongs to an existing region server"); - } - } - } - - public Lock getSplitLogLock() { - return splitLogLock; - } - - /* * Start up all services. If any of these threads gets an unhandled exception * then they just die with a logged message. This should be fine because * in general, we do not expect the master to get such unhandled exceptions @@ -680,7 +483,7 @@ } } // Something happened during startup. Shut things down. - this.closed.set(true); + setClosed(); LOG.error("Failed startup", e); } } @@ -689,7 +492,7 @@ * Start shutting down the master */ public void startShutdown() { - this.closed.set(true); + setClosed(); this.regionManager.stopScanners(); this.regionServerOperationQueue.shutdown(); this.serverManager.notifyServers(); @@ -1053,7 +856,7 @@ } // Need hri Result rr = getFromMETA(regionname, HConstants.CATALOG_FAMILY); - HRegionInfo hri = getHRegionInfo(rr.getRow(), rr); + HRegionInfo hri = RegionManager.getHRegionInfo(rr.getRow(), rr); if (hostnameAndPort == null) { // Get server from the .META. if it wasn't passed as argument hostnameAndPort = @@ -1095,64 +898,6 @@ return status; } - // TODO ryan rework this function - /* - * Get HRegionInfo from passed META map of row values. - * Returns null if none found (and logs fact that expected COL_REGIONINFO - * was missing). Utility method used by scanners of META tables. - * @param row name of the row - * @param map Map to do lookup in. - * @return Null or found HRegionInfo. - * @throws IOException - */ - public HRegionInfo getHRegionInfo(final byte [] row, final Result res) - throws IOException { - byte[] regioninfo = res.getValue(HConstants.CATALOG_FAMILY, - HConstants.REGIONINFO_QUALIFIER); - if (regioninfo == null) { - StringBuilder sb = new StringBuilder(); - NavigableMap infoMap = - res.getFamilyMap(HConstants.CATALOG_FAMILY); - for (byte [] e: infoMap.keySet()) { - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(Bytes.toString(HConstants.CATALOG_FAMILY) + ":" - + Bytes.toString(e)); - } - LOG.warn(Bytes.toString(HConstants.CATALOG_FAMILY) + ":" + - Bytes.toString(HConstants.REGIONINFO_QUALIFIER) - + " is empty for row: " + Bytes.toString(row) + "; has keys: " - + sb.toString()); - return null; - } - return Writables.getHRegionInfo(regioninfo); - } - - /* - * When we find rows in a meta region that has an empty HRegionInfo, we - * clean them up here. - * - * @param s connection to server serving meta region - * @param metaRegionName name of the meta region we scanned - * @param emptyRows the row keys that had empty HRegionInfos - */ - public void deleteEmptyMetaRows(HRegionInterface s, - byte [] metaRegionName, - List emptyRows) { - for (byte [] regionName: emptyRows) { - try { - HRegion.removeRegionFromMETA(s, metaRegionName, regionName); - LOG.warn("Removed region: " + Bytes.toString(regionName) + - " from meta region: " + - Bytes.toString(metaRegionName) + " because HRegionInfo was empty"); - } catch (IOException e) { - LOG.error("deleting region: " + Bytes.toString(regionName) + - " from meta region: " + Bytes.toString(metaRegionName), e); - } - } - } - /** * @see org.apache.zookeeper.Watcher#process(org.apache.zookeeper.WatchedEvent) */ Index: src/main/java/org/apache/hadoop/hbase/master/RegionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/RegionManager.java (working copy) @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.RegionTransitionEventData; import org.apache.hadoop.hbase.executor.HBaseEventHandler.HBaseEventType; import org.apache.hadoop.hbase.ipc.HRegionInterface; @@ -80,6 +81,8 @@ private static final byte[] OVERLOADED = Bytes.toBytes("Overloaded"); private static final byte [] META_REGION_PREFIX = Bytes.toBytes(".META.,"); + + private static int threadWakeFrequency; /** * Map of region name to RegionState for regions that are in transition such as @@ -133,8 +136,12 @@ Configuration conf = masterStatus.getConfiguration(); this.masterStatus = masterStatus; + threadWakeFrequency = + masterStatus.getConfiguration().getInt( + HConstants.THREAD_WAKE_FREQUENCY, + HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); this.zkWrapper = - ZooKeeperWrapper.getInstance(conf, HMaster.class.getName()); + ZooKeeperWrapper.getInstance(conf, masterStatus.getHServerAddress().toString()); this.maxAssignInOneGo = conf.getInt("hbase.regions.percheckin", 10); this.loadBalancer = new LoadBalancer(conf); @@ -200,7 +207,7 @@ void assignRegions(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList returnMsgs) { HServerLoad thisServersLoad = info.getLoad(); - boolean isSingleServer = this.masterStatus.numServers() == 1; + boolean isSingleServer = this.masterStatus.getServerManager().numServers() == 1; // figure out what regions need to be assigned and aren't currently being // worked on elsewhere. @@ -361,7 +368,7 @@ final HServerLoad thisServersLoad) { SortedMap> lightServers = new TreeMap>(); - this.masterStatus.getLightServers(thisServersLoad, lightServers); + this.masterStatus.getServerManager().getLightServers(thisServersLoad, lightServers); // Examine the list of servers that are more lightly loaded than this one. // Pretend that we will assign regions to these more lightly loaded servers // until they reach load equal with ours. Then, see how many regions are left @@ -454,9 +461,9 @@ SortedMap> heavyServers = new TreeMap>(); - synchronized (masterStatus.getLoadToServers()) { + synchronized (masterStatus.getServerManager().getLoadToServers()) { heavyServers.putAll( - masterStatus.getLoadToServers().tailMap(referenceLoad)); + masterStatus.getServerManager().getLoadToServers().tailMap(referenceLoad)); } int nservers = 0; for (Map.Entry> e : heavyServers.entrySet()) { @@ -555,12 +562,15 @@ public int countRegionsOnFS() throws IOException { int regions = 0; FileStatus [] tableDirs = - this.masterStatus.getFileSystem().listStatus(this.masterStatus.getRootDir(), new TableDirFilter()); + masterStatus.getFileSystemManager().getFileSystem().listStatus( + this.masterStatus.getFileSystemManager().getRootDir(), new TableDirFilter()); FileStatus[] regionDirs; RegionDirFilter rdf = new RegionDirFilter(); for(FileStatus tabledir : tableDirs) { if(tabledir.isDir()) { - regionDirs = this.masterStatus.getFileSystem().listStatus(tabledir.getPath(), rdf); + regionDirs = + masterStatus.getFileSystemManager().getFileSystem().listStatus( + tabledir.getPath(), rdf); regions += regionDirs.length; } } @@ -630,8 +640,11 @@ } catch(Exception iex) { LOG.warn("meta scanner", iex); } - masterStatus.getZooKeeperWrapper().clearRSDirectory(); - masterStatus.getZooKeeperWrapper().close(); + ZooKeeperWrapper zkw = ZooKeeperWrapper.getInstance( + masterStatus.getConfiguration(), + masterStatus.getHServerAddress().toString()); + zkw.clearRSDirectory(); + zkw.close(); } /** @@ -737,7 +750,7 @@ byte [] metaRegionName) throws IOException { // 2. Create the HRegion - HRegion region = HRegion.createHRegion(newRegion, this.masterStatus.getRootDir(), + HRegion region = HRegion.createHRegion(newRegion, this.masterStatus.getFileSystemManager().getRootDir(), masterStatus.getConfiguration()); // 3. Insert into meta @@ -1179,7 +1192,7 @@ // allocated the ROOT region below. try { // Cycle rather than hold here in case master is closed meantime. - rootRegionLocation.wait(this.masterStatus.getThreadWakeFrequency()); + rootRegionLocation.wait(threadWakeFrequency); } catch (InterruptedException e) { // continue } @@ -1220,7 +1233,10 @@ private void writeRootRegionLocationToZooKeeper(HServerAddress address) { for (int attempt = 0; attempt < zooKeeperNumRetries; ++attempt) { - if (masterStatus.getZooKeeperWrapper().writeRootRegionLocation(address)) { + ZooKeeperWrapper zkw = ZooKeeperWrapper.getInstance( + masterStatus.getConfiguration(), + masterStatus.getHServerAddress().toString()); + if (zkw.writeRootRegionLocation(address)) { return; } @@ -1394,7 +1410,7 @@ void loadBalancing(HServerInfo info, HRegionInfo[] mostLoadedRegions, ArrayList returnMsgs) { HServerLoad servLoad = info.getLoad(); - double avg = masterStatus.getAverageLoad(); + double avg = masterStatus.getServerManager().getAverageLoad(); // nothing to balance if server load not more then average load if(servLoad.getLoad() <= Math.ceil(avg) || avg <= 2.0) { @@ -1449,7 +1465,7 @@ double avgLoad) { SortedMap> loadToServers = - masterStatus.getLoadToServers(); + masterStatus.getServerManager().getLoadToServers(); // check if server most loaded if (!loadToServers.get(loadToServers.lastKey()).contains(srvName)) return 0; @@ -1684,4 +1700,62 @@ return Bytes.compareTo(getRegionName(), o.getRegionName()); } } + + /* + * When we find rows in a meta region that has an empty HRegionInfo, we + * clean them up here. + * + * @param s connection to server serving meta region + * @param metaRegionName name of the meta region we scanned + * @param emptyRows the row keys that had empty HRegionInfos + */ + public static void deleteEmptyMetaRows(HRegionInterface s, + byte [] metaRegionName, + List emptyRows) { + for (byte [] regionName: emptyRows) { + try { + HRegion.removeRegionFromMETA(s, metaRegionName, regionName); + LOG.warn("Removed region: " + Bytes.toString(regionName) + + " from meta region: " + + Bytes.toString(metaRegionName) + " because HRegionInfo was empty"); + } catch (IOException e) { + LOG.error("deleting region: " + Bytes.toString(regionName) + + " from meta region: " + Bytes.toString(metaRegionName), e); + } + } + } + + // TODO ryan rework this function + /* + * Get HRegionInfo from passed META map of row values. + * Returns null if none found (and logs fact that expected COL_REGIONINFO + * was missing). Utility method used by scanners of META tables. + * @param row name of the row + * @param map Map to do lookup in. + * @return Null or found HRegionInfo. + * @throws IOException + */ + public static HRegionInfo getHRegionInfo(final byte [] row, final Result res) + throws IOException { + byte[] regioninfo = res.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER); + if (regioninfo == null) { + StringBuilder sb = new StringBuilder(); + NavigableMap infoMap = + res.getFamilyMap(HConstants.CATALOG_FAMILY); + for (byte [] e: infoMap.keySet()) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(Bytes.toString(HConstants.CATALOG_FAMILY) + ":" + + Bytes.toString(e)); + } + LOG.warn(Bytes.toString(HConstants.CATALOG_FAMILY) + ":" + + Bytes.toString(HConstants.REGIONINFO_QUALIFIER) + + " is empty for row: " + Bytes.toString(row) + "; has keys: " + + sb.toString()); + return null; + } + return Writables.getHRegionInfo(regioninfo); + } } Index: src/main/java/org/apache/hadoop/hbase/master/FileSystemManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/FileSystemManager.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/FileSystemManager.java (revision 0) @@ -0,0 +1,242 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * This class abstract a bunch of operations the HMaster needs to interact with + * the underlying file system, including splitting log files, checking file + * system status, etc. + */ +public class FileSystemManager { + private static final Log LOG = LogFactory.getLog(FileSystemManager.class.getName()); + // HBase configuration + Configuration conf; + // master status + MasterStatus masterStatus; + // Keep around for convenience. + private final FileSystem fs; + // Is the fileystem ok? + private volatile boolean fsOk = true; + // The Path to the old logs dir + private final Path oldLogDir; + // root hbase directory on the FS + private final Path rootdir; + // create the split log lock + final Lock splitLogLock = new ReentrantLock(); + + public FileSystemManager(Configuration conf, MasterStatus masterStatus) throws IOException { + this.conf = conf; + this.masterStatus = masterStatus; + // Set filesystem to be that of this.rootdir else we get complaints about + // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is + // default localfs. Presumption is that rootdir is fully-qualified before + // we get to here with appropriate fs scheme. + this.rootdir = FSUtils.getRootDir(conf); + // Cover both bases, the old way of setting default fs and the new. + // We're supposed to run on 0.20 and 0.21 anyways. + conf.set("fs.default.name", this.rootdir.toString()); + conf.set("fs.defaultFS", this.rootdir.toString()); + this.fs = FileSystem.get(conf); + + checkRootDir(this.rootdir, conf, this.fs); + + // Make sure the region servers can archive their old logs + this.oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); + if(!this.fs.exists(this.oldLogDir)) { + this.fs.mkdirs(this.oldLogDir); + } + } + + public FileSystem getFileSystem() { + return this.fs; + } + + /** + * Get the directory where old logs go + * @return the dir + */ + public Path getOldLogDir() { + return this.oldLogDir; + } + + /** + * Checks to see if the file system is still accessible. + * If not, sets closed + * @return false if file system is not available + */ + public boolean checkFileSystem() { + if (this.fsOk) { + try { + FSUtils.checkFileSystemAvailable(this.fs); + } catch (IOException e) { + LOG.fatal("Shutting down HBase cluster: file system not available", e); + masterStatus.setClosed(); + this.fsOk = false; + } + } + return this.fsOk; + } + + /** + * @return HBase root dir. + * @throws IOException + */ + public Path getRootDir() { + return this.rootdir; + } + + public Lock getSplitLogLock() { + return splitLogLock; + } + + /* + * Inspect the log directory to recover any log file without + * ad active region server. + */ + public void splitLogAfterStartup() { + Path logsDirPath = + new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); + try { + if (!this.fs.exists(logsDirPath)) return; + } catch (IOException e) { + throw new RuntimeException("Could exists for " + logsDirPath, e); + } + FileStatus[] logFolders; + try { + logFolders = this.fs.listStatus(logsDirPath); + } catch (IOException e) { + throw new RuntimeException("Failed listing " + logsDirPath.toString(), e); + } + if (logFolders == null || logFolders.length == 0) { + LOG.debug("No log files to split, proceeding..."); + return; + } + for (FileStatus status : logFolders) { + String serverName = status.getPath().getName(); + LOG.info("Found log folder : " + serverName); + if(masterStatus.getServerManager().getServerInfo(serverName) == null) { + LOG.info("Log folder doesn't belong " + + "to a known region server, splitting"); + this.splitLogLock.lock(); + Path logDir = + new Path(this.rootdir, HLog.getHLogDirectoryName(serverName)); + try { + HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, conf); + } catch (IOException e) { + LOG.error("Failed splitting " + logDir.toString(), e); + } finally { + this.splitLogLock.unlock(); + } + } else { + LOG.info("Log folder belongs to an existing region server"); + } + } + } + + /* + * Get the rootdir. Make sure its wholesome and exists before returning. + * @param rd + * @param conf + * @param fs + * @return hbase.rootdir (after checks for existence and bootstrapping if + * needed populating the directory with necessary bootup files). + * @throws IOException + */ + private static Path checkRootDir(final Path rd, final Configuration c, + final FileSystem fs) + throws IOException { + // If FS is in safe mode wait till out of it. + FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, + 10 * 1000)); + // Filesystem is good. Go ahead and check for hbase.rootdir. + if (!fs.exists(rd)) { + fs.mkdirs(rd); + FSUtils.setVersion(fs, rd); + } else { + FSUtils.checkVersion(fs, rd, true); + } + // Make sure the root region directory exists! + if (!FSUtils.rootRegionExists(fs, rd)) { + bootstrap(rd, c); + } + return rd; + } + + private static void bootstrap(final Path rd, final Configuration c) + throws IOException { + LOG.info("BOOTSTRAP: creating ROOT and first META regions"); + try { + // Bootstrapping, make sure blockcache is off. Else, one will be + // created here in bootstap and it'll need to be cleaned up. Better to + // not make it in first place. Turn off block caching for bootstrap. + // Enable after. + HRegionInfo rootHRI = new HRegionInfo(HRegionInfo.ROOT_REGIONINFO); + setInfoFamilyCaching(rootHRI, false); + HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO); + setInfoFamilyCaching(metaHRI, false); + HRegion root = HRegion.createHRegion(rootHRI, rd, c); + HRegion meta = HRegion.createHRegion(metaHRI, rd, c); + setInfoFamilyCaching(rootHRI, true); + setInfoFamilyCaching(metaHRI, true); + // Add first region from the META table to the ROOT region. + HRegion.addRegionToMETA(root, meta); + root.close(); + root.getLog().closeAndDelete(); + meta.close(); + meta.getLog().closeAndDelete(); + } catch (IOException e) { + e = RemoteExceptionHandler.checkIOException(e); + LOG.error("bootstrap", e); + throw e; + } + } + + /* + * @param hri Set all family block caching to b + * @param b + */ + private static void setInfoFamilyCaching(final HRegionInfo hri, final boolean b) { + for (HColumnDescriptor hcd: hri.getTableDesc().families.values()) { + if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) { + hcd.setBlockCacheEnabled(b); + hcd.setInMemory(b); + } + } + } +} Index: src/main/java/org/apache/hadoop/hbase/master/DeleteColumn.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/DeleteColumn.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/DeleteColumn.java (working copy) @@ -45,8 +45,8 @@ updateRegionInfo(server, m.getRegionName(), i); // Delete the directories used by the column Path tabledir = - new Path(this.master.getRootDir(), i.getTableDesc().getNameAsString()); - this.master.getFileSystem(). + new Path(this.master.getFileSystemManager().getRootDir(), i.getTableDesc().getNameAsString()); + this.master.getFileSystemManager().getFileSystem(). delete(Store.getStoreHomedir(tabledir, i.getEncodedName(), this.columnName), true); } Index: src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java (working copy) @@ -183,7 +183,7 @@ if (values == null || values.size() == 0) { break; } - HRegionInfo info = masterStatus.getHRegionInfo(values.getRow(), values); + HRegionInfo info = RegionManager.getHRegionInfo(values.getRow(), values); if (info == null) { emptyRows.add(values.getRow()); continue; @@ -229,7 +229,7 @@ if (emptyRows.size() > 0) { LOG.warn("Found " + emptyRows.size() + " rows with empty HRegionInfo " + "while scanning meta region " + Bytes.toString(region.getRegionName())); - this.masterStatus.deleteEmptyMetaRows(regionServer, region.getRegionName(), + RegionManager.deleteEmptyMetaRows(regionServer, region.getRegionName(), emptyRows); } @@ -309,8 +309,8 @@ LOG.info("Deleting region " + parent.getRegionNameAsString() + " (encoded=" + parent.getEncodedName() + ") because daughter splits no longer hold references"); - HRegion.deleteRegion(this.masterStatus.getFileSystem(), - this.masterStatus.getRootDir(), parent); + HRegion.deleteRegion(masterStatus.getFileSystemManager().getFileSystem(), + this.masterStatus.getFileSystemManager().getRootDir(), parent); HRegion.removeRegionFromMETA(srvr, metaRegionName, parent.getRegionName()); result = true; @@ -499,15 +499,15 @@ return result; } Path tabledir = - new Path(this.masterStatus.getRootDir(), split.getTableDesc().getNameAsString()); + new Path(this.masterStatus.getFileSystemManager().getRootDir(), split.getTableDesc().getNameAsString()); for (HColumnDescriptor family: split.getTableDesc().getFamilies()) { Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(), family.getName()); - if (!this.masterStatus.getFileSystem().exists(p)) continue; + if (!masterStatus.getFileSystemManager().getFileSystem().exists(p)) continue; // Look for reference files. Call listStatus with an anonymous // instance of PathFilter. FileStatus [] ps = - this.masterStatus.getFileSystem().listStatus(p, new PathFilter () { + masterStatus.getFileSystemManager().getFileSystem().listStatus(p, new PathFilter () { public boolean accept(Path path) { return StoreFile.isReference(path); } Index: src/main/java/org/apache/hadoop/hbase/util/FSUtils.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (revision 958800) +++ src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (working copy) @@ -394,7 +394,7 @@ public static Map getTableFragmentation( final HMaster master) throws IOException { - Path path = master.getRootDir(); + Path path = master.getFileSystemManager().getRootDir(); // since HMaster.getFileSystem() is package private FileSystem fs = path.getFileSystem(master.getConfiguration()); return getTableFragmentation(fs, path);