Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1333909) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -338,7 +338,7 @@ /** * Called on startup. - * Figures whether a fresh cluster start of we are joining extant running cluster. + * Figures whether a fresh cluster start or we are joining existing running cluster. * @param onlineServers onlined servers when master started * @throws IOException * @throws KeeperException @@ -2475,6 +2475,10 @@ throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScan(this.catalogTracker); + + // Add any new but slow to checkin region server that joined the cluster + onlineServers.addAll(((HMaster) master).getServerManager().getOnlineServers().keySet()); + // Map of offline servers and their regions to be returned Map>> offlineServers = new TreeMap>>(); Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1333909) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -579,11 +579,12 @@ } this.assignmentManager.startTimeOutMonitor(); + long currentTime = System.currentTimeMillis(); Set onlineServers = new HashSet(serverManager .getOnlineServers().keySet()); // TODO: Should do this in background rather than block master startup status.setStatus("Splitting logs after master startup"); - splitLogAfterStartup(this.fileSystemManager, onlineServers); + splitLogAfterStartup(this.fileSystemManager, onlineServers, currentTime); // Make sure root and meta assigned before proceeding. if (!assignRootAndMeta(status)) return; @@ -635,10 +636,11 @@ * Override to change master's splitLogAfterStartup. Used testing * @param mfs * @param onlineServers + * @param currentTime */ protected void splitLogAfterStartup(final MasterFileSystem mfs, - Set onlineServers) { - mfs.splitLogAfterStartup(onlineServers); + Set onlineServers, long currentTime) { + mfs.splitLogAfterStartup(onlineServers, currentTime); } /** Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1333909) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -185,10 +185,13 @@ /** * Inspect the log directory to recover any log file without * an active region server. + * Currenttime is passed to avoid HBASE-5816. Any RS logs + * checked in after the currentTime should not be splitted. * @param onlineServers Set of online servers keyed by * {@link ServerName} + * @param currentTime */ - void splitLogAfterStartup(final Set onlineServers) { + void splitLogAfterStartup(final Set onlineServers, long currentTime) { boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", HLog.SPLIT_SKIP_ERRORS_DEFAULT); Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); @@ -210,9 +213,14 @@ } ServerName serverName = ServerName.parseServerName(sn); if (!onlineServers.contains(serverName)) { - LOG.info("Log folder " + status.getPath() + " doesn't belong " - + "to a known region server, splitting"); - serverNames.add(serverName); + if (status.getModificationTime() < currentTime) { + LOG.info("Log folder " + status.getPath() + " doesn't belong " + + "to a known region server, splitting"); + serverNames.add(serverName); + }else{ + LOG.info("Log folder " + status.getPath() + " found for a region server " + + serverName + " that has been slower in checking in"); + } } else { LOG.info("Log folder " + status.getPath() + " belongs to an existing region server"); Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1333909) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -195,7 +195,10 @@ existingServer + " looks stale, new server:" + serverName); expireServer(existingServer); } - throw new PleaseHoldException(message); + if (services.isServerShutdownHandlerEnabled()) { + // master has completed the initialization + throw new PleaseHoldException(message); + } } } Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 1333909) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy) @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; +import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -51,6 +53,7 @@ import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.mockito.internal.util.reflection.Whitebox; @Category(LargeTests.class) public class TestMasterFailover { @@ -135,6 +138,86 @@ * * @throws Exception */ + + /** + * Verifies HBASE-5816. Here before the master splits the HLog if any new RS checks in it should + * not split the new RS HLog. + * + * @throws Exception + */ + + @Test + public void testShouldNotAllowTwosplitLogsOnMasterFailOver() throws Exception { + Configuration conf = HBaseConfiguration.create(); + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + try { + LOG.info("Starting testShouldNotAllowTwosplitLogsOnMasterFailOver"); + final int NUM_MASTERS = 1; + final int NUM_RS = 1; + + conf.setInt("hbase.master.assignment.timeoutmonitor.period", 5000); + conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 10000); + + // Start the cluster + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + + cluster.getRegionServer(0); + + // wait for master. + cluster.waitForActiveAndReadyMaster(); + + HMaster master = cluster.getMaster(); + + // mock the MasterFileSystem in the HMaster. + MockMasterFileSystem mfs = new MockMasterFileSystem(master, master, new MasterMetrics(master + .getServerManager().toString())); + Whitebox.setInternalState(master, "fileSystemManager", mfs); + + // Get the current list of online servers. + long currentTime = System.currentTimeMillis(); + Set prevonlineServerList = new HashSet(master.getServerManager() + .getOnlineServers().keySet()); + + // Start another server. + RegionServerThread newRSThread = cluster.startRegionServer(); + + // call split log with the current list of online servers. + // Split log will split the HLog of the newly started RS also as it is + // not present in the RS list. + mfs.splitLogAfterStartup(prevonlineServerList, currentTime); + + HRegionServer newRS = newRSThread.getRegionServer(); + String hLogDirectoryName = newRS.getWAL().getHLogDirectoryName( + newRS.getServerName().toString()); + + // The HLog file should not be deleted. + // See HBASE-5816. + assertTrue( + "HLog folder for the RS should be present.", + newRS.getFileSystem().exists( + new Path(mfs.getFileSystem().getHomeDirectory() + "/hbase/" + hLogDirectoryName))); + + } finally { + TEST_UTIL.shutdownMiniCluster(); + } + + } + + public class MockMasterFileSystem extends MasterFileSystem { + + public MockMasterFileSystem(Server master, MasterServices services, MasterMetrics metrics) + throws IOException { + super(master, services, metrics); + } + + @Override + void splitLogAfterStartup(Set onlineServers, long currentTime) { + super.splitLogAfterStartup(onlineServers, currentTime); + } + + } @Test (timeout=180000) public void testMasterFailoverWithMockedRIT() throws Exception { Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 1333909) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (working copy) @@ -99,8 +99,8 @@ @Override protected void splitLogAfterStartup(MasterFileSystem mfs, - Set onlineServers) { - super.splitLogAfterStartup(mfs, onlineServers); + Set onlineServers, long currentTime) { + super.splitLogAfterStartup(mfs, onlineServers, currentTime); logSplit = true; // If "TestingMaster.sleep" is set, sleep after log split. if (getConfiguration().getBoolean("TestingMaster.sleep", false)) {