Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1297323) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -46,7 +46,6 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter; -import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; @@ -183,127 +182,118 @@ } /** - * Inspect the log directory to recover any log file without - * an active region server. - * @param onlineServers Set of online servers keyed by - * {@link ServerName} + * Inspect the log directory to recover any log file without an active region + * server. + * @param onlineServers Set of online servers keyed by {@link ServerName} + * @throws IOException */ - void splitLogAfterStartup(final Set onlineServers) { - boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", - HLog.SPLIT_SKIP_ERRORS_DEFAULT); + void splitLogAfterStartup(final Set onlineServers) + throws IOException { Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); - do { - List serverNames = new ArrayList(); - try { - if (!this.fs.exists(logsDirPath)) return; - FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null); + List serverNames = new ArrayList(); + if (!this.fs.exists(logsDirPath)) + return; + FileStatus[] logFolders = FSUtils.listStatus(this.fs, logsDirPath, null); - if (logFolders == null || logFolders.length == 0) { - LOG.debug("No log files to split, proceeding..."); - return; - } - for (FileStatus status : logFolders) { - String sn = status.getPath().getName(); - // truncate splitting suffix if present (for ServerName parsing) - if (sn.endsWith(HLog.SPLITTING_EXT)) { - sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); - } - ServerName serverName = ServerName.parseServerName(sn); - if (!onlineServers.contains(serverName)) { - LOG.info("Log folder " + status.getPath() + " doesn't belong " - + "to a known region server, splitting"); - serverNames.add(serverName); - } else { - LOG.info("Log folder " + status.getPath() - + " belongs to an existing region server"); - } - } - splitLog(serverNames); - retrySplitting = false; - } catch (IOException ioe) { - LOG.warn("Failed splitting of " + serverNames, ioe); - if (!checkFileSystem()) { - LOG.warn("Bad Filesystem, exiting"); - Runtime.getRuntime().halt(1); - } - try { - if (retrySplitting) { - Thread.sleep(conf.getInt( - "hbase.hlog.split.failure.retry.interval", 30 * 1000)); - } - } catch (InterruptedException e) { - LOG.warn("Interrupted, aborting since cannot return w/o splitting"); - Thread.currentThread().interrupt(); - retrySplitting = false; - Runtime.getRuntime().halt(1); - } + if (logFolders == null || logFolders.length == 0) { + LOG.debug("No log files to split, proceeding..."); + return; + } + for (FileStatus status : logFolders) { + String sn = status.getPath().getName(); + // truncate splitting suffix if present (for ServerName parsing) + if (sn.endsWith(HLog.SPLITTING_EXT)) { + sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); } - } while (retrySplitting); + ServerName serverName = ServerName.parseServerName(sn); + if (!onlineServers.contains(serverName)) { + LOG.info("Log folder " + status.getPath() + " doesn't belong " + + "to a known region server, splitting"); + serverNames.add(serverName); + } else { + LOG.info("Log folder " + status.getPath() + + " belongs to an existing region server"); + } + } + splitLog(serverNames); + } - public void splitLog(final ServerName serverName) throws IOException { + public void splitLog(final ServerName serverName) { List serverNames = new ArrayList(); serverNames.add(serverName); splitLog(serverNames); } - public void splitLog(final List serverNames) throws IOException { + public void splitLog(final List serverNames) { long splitTime = 0, splitLogSize = 0; List logDirs = new ArrayList(); - for(ServerName serverName: serverNames){ - Path logDir = new Path(this.rootdir, - HLog.getHLogDirectoryName(serverName.toString())); - Path splitDir = logDir.suffix(HLog.SPLITTING_EXT); - // rename the directory so a rogue RS doesn't create more HLogs - if (fs.exists(logDir)) { - if (!this.fs.rename(logDir, splitDir)) { - throw new IOException("Failed fs.rename for log split: " + logDir); + int retrySplittingNum = conf.getInt("hbase.hlog.split.retry.num", 2); + do{ + try { + for (ServerName serverName : serverNames) { + Path logDir = new Path(this.rootdir, + HLog.getHLogDirectoryName(serverName.toString())); + Path splitDir = logDir.suffix(HLog.SPLITTING_EXT); + // rename the directory so a rogue RS doesn't create more HLogs + if (fs.exists(logDir)) { + if (!this.fs.rename(logDir, splitDir)) { + throw new IOException("Failed fs.rename for log split: " + logDir); + } + logDir = splitDir; + LOG.debug("Renamed region directory: " + splitDir); + } else if (!fs.exists(splitDir)) { + LOG.info("Log dir for server " + serverName + " does not exist"); + continue; + } + logDirs.add(splitDir); } - logDir = splitDir; - LOG.debug("Renamed region directory: " + splitDir); - } else if (!fs.exists(splitDir)) { - LOG.info("Log dir for server " + serverName + " does not exist"); - continue; - } - logDirs.add(splitDir); - } - if (logDirs.isEmpty()) { - LOG.info("No logs to split"); - return; - } - - if (distributedLogSplitting) { - splitLogManager.handleDeadWorkers(serverNames); - splitTime = EnvironmentEdgeManager.currentTimeMillis(); - splitLogSize = splitLogManager.splitLogDistributed(logDirs); - splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime; - } else { - for(Path logDir: logDirs){ - // splitLogLock ensures that dead region servers' logs are processed - // one at a time - this.splitLogLock.lock(); - try { - HLogSplitter splitter = HLogSplitter.createLogSplitter( - conf, rootdir, logDir, oldLogDir, this.fs); - try { - // If FS is in safe mode, just wait till out of it. - FSUtils.waitOnSafeMode(conf, conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000)); - splitter.splitLog(); - } catch (OrphanHLogAfterSplitException e) { - LOG.warn("Retrying splitting because of:", e); - //An HLogSplitter instance can only be used once. Get new instance. - splitter = HLogSplitter.createLogSplitter(conf, rootdir, logDir, - oldLogDir, this.fs); - splitter.splitLog(); + if (logDirs.isEmpty()) { + LOG.info("No logs to split"); + return; + } + if (distributedLogSplitting) { + splitLogManager.handleDeadWorkers(serverNames); + splitTime = EnvironmentEdgeManager.currentTimeMillis(); + splitLogSize = splitLogManager.splitLogDistributed(logDirs); + splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime; + } else { + for (Path logDir : logDirs) { + // splitLogLock ensures that dead region servers' logs are processed + // one at a time + this.splitLogLock.lock(); + try { + HLogSplitter splitter = HLogSplitter.createLogSplitter(conf, + rootdir, logDir, oldLogDir, this.fs); + // If FS is in safe mode, just wait till out of it. + FSUtils.waitOnSafeMode(conf, + conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000)); + splitter.splitLog(); + splitTime = splitter.getTime(); + splitLogSize = splitter.getSize(); + } finally { + this.splitLogLock.unlock(); + } } - splitTime = splitter.getTime(); - splitLogSize = splitter.getSize(); - } finally { - this.splitLogLock.unlock(); } + retrySplittingNum = 0; + } catch (IOException e) { + LOG.warn("Failed splitting log of" + serverNames, e); + if (!checkFileSystem()) { + LOG.warn("Bad Filesystem, exiting"); + retrySplittingNum = 0; + Runtime.getRuntime().halt(1); + } else { + if (retrySplittingNum > 0) { + LOG.info("Retry splitting log, remanent times = " + + retrySplittingNum); + } else { + master.abort("Failed splitting log after retry", e); + } + } } - } + } while (retrySplittingNum-- > 0); if (this.metrics != null) { this.metrics.addSplit(splitTime, splitLogSize); Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1297323) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -171,18 +171,11 @@ public void process() throws IOException { final ServerName serverName = this.serverName; try { - try { - if (this.shouldSplitHlog) { - LOG.info("Splitting logs for " + serverName); - this.services.getMasterFileSystem().splitLog(serverName); - } else { - LOG.info("Skipping log splitting for " + serverName); - } - } catch (IOException ioe) { - this.services.getExecutorService().submit(this); - this.deadServers.add(serverName); - throw new IOException("failed log splitting for " + - serverName + ", will retry", ioe); + if (this.shouldSplitHlog) { + LOG.info("Splitting logs for " + serverName); + this.services.getMasterFileSystem().splitLog(serverName); + } else { + LOG.info("Skipping log splitting for " + serverName); } // Assign root and meta if we were carrying them.