Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1360540) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -140,9 +140,12 @@ // store all the table names in disabling state Set disablingTables = new HashSet(1); - // store all the enabling state tablenames. - Set enablingTables = new HashSet(1); + // store all the enabling state table names and corresponding online servers' regions. + // This may be needed to avoid calling assign twice for the regions of the ENABLING table + // that could have been assigned through processRIT. + Map> enablingTables = new HashMap>(1); + /** * Server to regions assignment map. * Contains the set of regions currently assigned to a given server. @@ -259,6 +262,15 @@ return regions.get(hri); } } + + /** + * Gives enabling table regions. + * @param tableName + * @return list of regionInfos + */ + public List getEnablingTableRegions(String tableName) { + return this.enablingTables.get(tableName); + } /** * Add a regionPlan for the specified region. @@ -272,6 +284,23 @@ } /** + * Add a regionPlan for the specified region if not present. + * + * @param encodedName + * @param plan + * @return return true if plan adds else false. + */ + public boolean addPlanIfNotPresent(HRegionInfo hri, RegionPlan plan) { + synchronized (regionPlans) { + if (regionPlans.get(hri.getEncodedName()) == null) { + regionPlans.put(hri.getEncodedName(), plan); + return true; + } + return false; + } + } + + /** * Add a map of region plans. */ public void addPlans(Map plans) { @@ -351,9 +380,16 @@ // Recover the tables that were not fully moved to DISABLED state. // These tables are in DISABLING state when the master restarted/switched. boolean isWatcherCreated = recoverTableInDisablingState(this.disablingTables); - recoverTableInEnablingState(this.enablingTables, isWatcherCreated); + recoverTableInEnablingState(this.enablingTables.keySet(), isWatcherCreated); + // clear the partially enabled/disabled tables map. We are done with it. + clearEnablingAndDisablingTables(); } + private void clearEnablingAndDisablingTables() { + this.enablingTables.clear(); + this.disablingTables.clear(); + } + /** * Process all regions that are in transition up in zookeeper. Used by * master joining an already running cluster. @@ -419,6 +455,17 @@ this.failover = true; } + // In a case where all the regions of the table are assigned, by the time master restarted + // before changing the state of the table to ENABLED we think it to be a cluster startup. This + // will lead to bulk assignment and as already the regions are assigned may cause region + // assignment inconsistency. + // Even in the case where few regions of the ENABLING table are yet to be assigned going + // through failover is ideal so that the znodes that are in transition can be picked for + // assignment. + if(this.failover == false && this.enablingTables.size() > 0){ + this.failover = true; + } + // If we found user regions out on cluster, its a failover. if (this.failover) { LOG.info("Found regions out on cluster or in RIT; failover"); @@ -496,6 +543,13 @@ String encodedRegionName = regionInfo.getEncodedName(); LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + data.getEventType()); + // remove the region from enabling table regions then EnableTableHandler dont consider the + // region for assignment. See HBASE-6317. + List hris = this.enablingTables.get(regionInfo.getTableNameAsString()); + if(hris != null && !hris.isEmpty()){ + hris.remove(regionInfo); + } + synchronized (regionsInTransition) { RegionState regionState = regionsInTransition.get(encodedRegionName); if (regionState != null || @@ -1665,10 +1719,27 @@ if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); if (t instanceof RegionAlreadyInTransitionException) { - String errorMsg = "Failed assignment in: " + plan.getDestination() - + " due to " + t.getMessage(); - LOG.error(errorMsg, t); - return; + // If we are coming through master restart there are two places where we call single assign + // -> ProcessRIT if the node is found in OFFLINE, CLOSED and FAILED_OPEN + // In CLOSED state and FAILED_OPEN any way only a new RS is picked up and not the old RS + // on which the Region was previously opening. + // Only in OFFLINE state we can get this exception, that's why we retry it otherwise the + // RS will also stop the assignment saying znode got hijacked. + // -> EnableTableHandler.process() is the other place. + // Here anyway any znode in ProcessRIT we remove it from the this.enablingTables. So no + // two assignments can happen for the same region through this. + if (((HMaster) this.master).isInitialized()) { + String errorMsg = "Failed assignment in: " + plan.getDestination() + " due to " + + t.getMessage(); + LOG.error(errorMsg, t); + return; + } else { + if (LOG.isDebugEnabled()) { + String msg = "Failed assignment in: " + plan.getDestination() + " due to " + + t.getMessage() + " during master initialization. retrying."; + LOG.debug(msg, t); + } + } } } LOG.warn("Failed assignment of " + @@ -2541,8 +2612,8 @@ } // Region is being served and on an active server // add only if region not in disabled and enabling table - if (false == checkIfRegionBelongsToDisabled(regionInfo) - && false == checkIfRegionsBelongsToEnabling(regionInfo)) { + boolean isEnablingTable = checkIfRegionsBelongsToEnabling(regionInfo); + if (!checkIfRegionBelongsToDisabled(regionInfo) && !isEnablingTable) { synchronized (this.regions) { regions.put(regionInfo, regionLocation); addToServers(regionLocation, regionInfo); @@ -2555,6 +2626,16 @@ // this will be used in rolling restarts enableTableIfNotDisabledOrDisablingOrEnabling(disabled, disablingOrEnabling, tableName); + if (isEnablingTable) { + List hris = this.enablingTables.get(tableName); + if (!hris.contains(regionInfo)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Adding region" + regionInfo.getRegionNameAsString() + + " to enabling table " + tableName + "."); + } + hris.add(regionInfo); + } + } } } return offlineServers; @@ -2569,15 +2650,16 @@ } private Boolean addTheTablesInPartialState(Set disablingTables, - Set enablingTables, HRegionInfo regionInfo, - String disablingTableName) { + Map> enablingTables, HRegionInfo regionInfo, String tableName) { if (checkIfRegionBelongsToDisabling(regionInfo)) { - disablingTables.add(disablingTableName); + disablingTables.add(tableName); return true; } else if (checkIfRegionsBelongsToEnabling(regionInfo)) { - enablingTables.add(disablingTableName); + if (!enablingTables.containsKey(tableName)) { + enablingTables.put(tableName, new ArrayList()); + } return true; - } + } return false; } Index: src/main/java/org/apache/hadoop/hbase/master/handler/EnableTableHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/EnableTableHandler.java (revision 1360540) +++ src/main/java/org/apache/hadoop/hbase/master/handler/EnableTableHandler.java (working copy) @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.handler; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutorService; @@ -27,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.catalog.CatalogTracker; @@ -34,7 +36,9 @@ import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.BulkAssigner; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.zookeeper.KeeperException; /** @@ -46,6 +50,7 @@ private final String tableNameStr; private final AssignmentManager assignmentManager; private final CatalogTracker ct; + private boolean masterRestart = false; public EnableTableHandler(Server server, byte [] tableName, CatalogTracker catalogTracker, AssignmentManager assignmentManager, @@ -56,6 +61,7 @@ this.tableNameStr = Bytes.toString(tableName); this.ct = catalogTracker; this.assignmentManager = assignmentManager; + this.masterRestart = skipTableStateCheck; // Check if table exists if (!MetaReader.tableExists(catalogTracker, this.tableNameStr)) { throw new TableNotFoundException(Bytes.toString(tableName)); @@ -99,10 +105,13 @@ LOG.error("Error trying to enable the table " + this.tableNameStr, e); } catch (KeeperException e) { LOG.error("Error trying to enable the table " + this.tableNameStr, e); + } catch (InterruptedException e) { + LOG.error("Error trying to enable the table " + this.tableNameStr, e); } + } - private void handleEnableTable() throws IOException, KeeperException { + private void handleEnableTable() throws IOException, KeeperException, InterruptedException { // I could check table is disabling and if so, not enable but require // that user first finish disabling but that might be obnoxious. @@ -111,18 +120,45 @@ boolean done = false; // Get the regions of this table. We're done when all listed // tables are onlined. - List regionsInMeta; - regionsInMeta = MetaReader.getTableRegions(this.ct, tableName, true); - int countOfRegionsInTable = regionsInMeta.size(); - List regions = regionsToAssign(regionsInMeta); - int regionsCount = regions.size(); + List> tableRegionsAndLocations = MetaReader + .getTableRegionsAndLocations(this.ct, tableName, true); + + int countOfRegionsInTable = tableRegionsAndLocations.size(); + List> regionsWithServerName = + regionsToAssignWithServerName(tableRegionsAndLocations); + + List regions = new ArrayList(1); + if (masterRestart) { + List enablingTableRegions = assignmentManager + .getEnablingTableRegions(this.tableNameStr); + if (!enablingTableRegions.isEmpty()) { + // Forcefully add the plan to the RS on which the region is already opened. + // In rebuildUserRegions the this.regions is not populated with the regions of the ENABLING + // table. So in this case if we call assign then the region may get assigned to random RS + // thus leading to double assignment. Hence make the assignment to go to an already + // available RS. + regions = addToRegionPlans(regionsWithServerName, enablingTableRegions); + if (LOG.isDebugEnabled()) { + LOG.debug("Assigning " + regions.size() + " regions from " + enablingTableRegions.size() + + " regions of enabling table " + this.tableNameStr + "."); + } + enablingTableRegions.clear(); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("No regions to assign for the table " + this.tableNameStr + + " during master restart."); + } + } + } else { + regions = addToRegionPlans(regionsWithServerName, null); + } + int regionsCount = (regions == null) ? 0 : regions.size(); if (regionsCount == 0) { done = true; } LOG.info("Table has " + countOfRegionsInTable + " regions of which " + regionsCount + " are offline."); - BulkEnabler bd = new BulkEnabler(this.server, regions, - countOfRegionsInTable); + BulkEnabler bd = new BulkEnabler(this.server, regions, countOfRegionsInTable, masterRestart); try { if (bd.bulkAssign()) { done = true; @@ -137,20 +173,52 @@ this.tableNameStr); LOG.info("Enabled table is done=" + done); } + + private List addToRegionPlans(List> regionPairs, + List enablingTableRegions) { + List regions = new ArrayList(regionPairs.size()); + for (Pair region : regionPairs) { + HRegionInfo hri = region.getFirst(); + if (masterRestart) { + if (enablingTableRegions.contains(hri)) { + boolean assign = this.assignmentManager.addPlanIfNotPresent(hri, + new RegionPlan(region.getFirst(), null, region.getSecond())); + if (assign) { + regions.add(hri); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping assign for the region " + hri.getRegionNameAsString() + + " during enable table " + hri.getTableNameAsString() + + " because its already in tranition."); + } + } + } + } else { + regions.add(hri); + } + } + return regions; + } /** - * @param regionsInMeta This datastructure is edited by this method. - * @return The regionsInMeta list minus the regions that have - * been onlined; i.e. List of regions that need onlining. + * @param regionsInMeta + * This datastructure is edited by this method. + * @return The regionsInMeta list minus the regions that have been onlined; i.e. List + * of regions that need onlining. * @throws IOException */ - private List regionsToAssign( - final List regionsInMeta) - throws IOException { - final List onlineRegions = - this.assignmentManager.getRegionsOfTable(tableName); - regionsInMeta.removeAll(onlineRegions); - return regionsInMeta; + private List> regionsToAssignWithServerName( + final List> regionsInMeta) throws IOException { + final List onlineRegions = this.assignmentManager.getRegionsOfTable(tableName); + List> regionsInMetaCopy = new ArrayList>( + regionsInMeta.size()); + + for (Pair regionLocation : regionsInMeta) { + if (!onlineRegions.contains(regionLocation.getFirst())) { + regionsInMetaCopy.add(regionLocation); + } + } + return regionsInMetaCopy; } /** @@ -160,12 +228,14 @@ private final List regions; // Count of regions in table at time this assign was launched. private final int countOfRegionsInTable; + private final boolean masterRestart; BulkEnabler(final Server server, final List regions, - final int countOfRegionsInTable) { + final int countOfRegionsInTable, boolean masterRestart) { super(server); this.regions = regions; this.countOfRegionsInTable = countOfRegionsInTable; + this.masterRestart = masterRestart; } @Override @@ -173,7 +243,9 @@ boolean roundRobinAssignment = this.server.getConfiguration().getBoolean( "hbase.master.enabletable.roundrobin", false); - if (!roundRobinAssignment) { + // In case of masterRestart always go with single assign. Going thro + // roundRobinAssignment will use bulkassign which may lead to double assignment. + if (masterRestart || !roundRobinAssignment) { for (HRegionInfo region : regions) { if (assignmentManager.isRegionInTransition(region) != null) { continue; @@ -181,7 +253,12 @@ final HRegionInfo hri = region; pool.execute(new Runnable() { public void run() { - assignmentManager.assign(hri, true); + if (masterRestart) { + // Already plan is populated. + assignmentManager.assign(hri, true, false, false); + } else { + assignmentManager.assign(hri, true); + } } }); } Index: src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (revision 1360540) +++ src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (working copy) @@ -92,7 +92,11 @@ private static final HRegionInfo REGIONINFO = new HRegionInfo(Bytes.toBytes("t"), HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW); - + private static final HRegionInfo REGIONINFO_2 = new HRegionInfo(Bytes.toBytes("t"), + Bytes.toBytes("a"),Bytes.toBytes( "b")); + private static int assignmentCount; + private static boolean enabling = false; + // Mocked objects or; get redone for each test. private Server server; private ServerManager serverManager; @@ -772,11 +776,18 @@ Result r = getMetaTableRowResult(REGIONINFO, SERVERNAME_A); Mockito.when(ri .openScanner((byte[]) Mockito.any(), (Scan) Mockito.any())). thenReturn(System.currentTimeMillis()); - // Return good result 'r' first and then return null to indicate end of scan - Mockito.when(ri.next(Mockito.anyLong(), Mockito.anyInt())).thenReturn(new Result[] { r }); - // If a get, return the above result too for REGIONINFO - Mockito.when(ri.get((byte[]) Mockito.any(), (Get) Mockito.any())). - thenReturn(r); + + if (enabling) { + Mockito.when(ri.next(Mockito.anyLong(), Mockito.anyInt())).thenReturn(new Result[] { r }, + new Result[] { r }, new Result[] { r }, (Result[]) null); + // If a get, return the above result too for REGIONINFO_2 + Mockito.when(ri.get((byte[]) Mockito.any(), (Get) Mockito.any())).thenReturn( + getMetaTableRowResult(REGIONINFO_2, SERVERNAME_A)); + } else { + Mockito.when(ri.next(Mockito.anyLong(), Mockito.anyInt())).thenReturn(new Result[] { r }); + // If a get, return the above result too for REGIONINFO + Mockito.when(ri.get((byte[]) Mockito.any(), (Get) Mockito.any())).thenReturn(r); + } // Get a connection w/ mocked up common methods. HConnection connection = HConnectionTestingUtility. getMockedConnectionAndDecorate(HTU.getConfiguration(), ri, SERVERNAME_B, @@ -885,6 +896,48 @@ } /** + * Test verifies whether all the enabling table regions assigned only once during master startup. + * + * @throws KeeperException + * @throws IOException + * @throws Exception + */ + @Test + public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception { + enabling = true; + this.server.getConfiguration().setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, + DefaultLoadBalancer.class, LoadBalancer.class); + Map serverAndLoad = new HashMap(); + serverAndLoad.put(SERVERNAME_A, null); + Mockito.when(this.serverManager.getOnlineServers()).thenReturn(serverAndLoad); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(false); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true); + + assignmentCount = 0; + AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(server, + this.serverManager); + am.gate.set(false); + try { + // set table in enabling state. + am.getZKTable().setEnablingTable(REGIONINFO.getTableNameAsString()); + ZKAssign.createNodeOffline(this.watcher, REGIONINFO_2, SERVERNAME_B); + + am.joinCluster(); + while (!am.getZKTable().isEnabledTable(REGIONINFO.getTableNameAsString())) { + Thread.sleep(10); + } + assertEquals("Number of assignments should be equal.", 2, assignmentCount); + assertTrue("Table should be enabled.", + am.getZKTable().isEnabledTable(REGIONINFO.getTableNameAsString())); + } finally { + am.getZKTable().setEnabledTable(REGIONINFO.getTableNameAsString()); + am.shutdown(); + ZKAssign.deleteAllNodes(this.watcher); + assignmentCount = 0; + } + } + + /** * Mocked load balancer class used in the testcase to make sure that the testcase waits until * random assignment is called and the gate variable is set to true. */ @@ -953,8 +1006,13 @@ @Override public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan, boolean hijack) { - assignInvoked = true; - super.assign(region, setOfflineInZK, forceNewPlan, hijack); + if (enabling) { + assignmentCount++; + this.regionOnline(region, SERVERNAME_A); + } else { + assignInvoked = true; + super.assign(region, setOfflineInZK, forceNewPlan, hijack); + } } @Override @@ -1016,6 +1074,7 @@ t.start(); while (!t.isAlive()) Threads.sleep(1); } + @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =