diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 2fbfd9f..6ba517c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -628,6 +628,50 @@ public class MetaTableAccessor { return visitor.getResults(); } + /* + * @param connection + * @param excludeOfflinedSplitParents + */ + public static List> getSystemTableRegionsAndLocations( + Connection connection, final boolean excludeOfflinedSplitParents) throws IOException { + // Make a version of CollectingVisitor that collects HRegionInfo and ServerAddress + CollectingVisitor> visitor = + new CollectingVisitor>() { + private RegionLocations current = null; + + @Override + public boolean visit(Result r) throws IOException { + current = getRegionLocations(r); + if (current == null || current.getRegionLocation().getRegionInfo() == null) { + LOG.warn("No serialized HRegionInfo in " + r); + return true; + } + HRegionInfo hri = current.getRegionLocation().getRegionInfo(); + if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; + + if (!hri.isSystemTable()) return true; + // Else call super and add this Result to the collection. + return super.visit(r); + } + + @Override + void add(Result r) { + if (current == null) { + return; + } + for (HRegionLocation loc : current.getRegionLocations()) { + if (loc != null) { + this.results.add(new Pair( + loc.getRegionInfo(), loc.getServerName())); + } + } + } + }; + scanMeta(connection, NamespaceDescriptor.SYSTEM_NAMESPACE_START_ROW, + NamespaceDescriptor.SYSTEM_NAMESPACE_END_ROW, QueryType.REGION, visitor); + return visitor.getResults(); + } + /** * @param connection connection we're using * @param serverName server whose regions we're interested in @@ -1074,6 +1118,7 @@ public class MetaTableAccessor { get.setTimeRange(0, time); Result result = metaHTable.get(get); + if (result == null) return null; return getTableState(result); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/NamespaceDescriptor.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/NamespaceDescriptor.java index e1ceace..13b0568 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/NamespaceDescriptor.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/NamespaceDescriptor.java @@ -45,6 +45,10 @@ public class NamespaceDescriptor { public static final byte [] SYSTEM_NAMESPACE_NAME = Bytes.toBytes("hbase"); public static final String SYSTEM_NAMESPACE_NAME_STR = Bytes.toString(SYSTEM_NAMESPACE_NAME); + + public static final byte [] SYSTEM_NAMESPACE_START_ROW = Bytes.toBytes("hbase:"); + public static final byte [] SYSTEM_NAMESPACE_END_ROW = Bytes.toBytes("hbase;"); + /** Default namespace name. */ public static final byte [] DEFAULT_NAMESPACE_NAME = Bytes.toBytes("default"); public static final String DEFAULT_NAMESPACE_NAME_STR = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index f7f98fe..d30e02b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -390,13 +390,14 @@ public class AssignmentManager { /** * Called on startup. - * Figures whether a fresh cluster start of we are joining extant running cluster. + * Figures whether a fresh cluster start or we are joining extant running cluster. + * @param pair Set of dead servers and a boolean, indicating whether this is failover * @throws IOException * @throws KeeperException * @throws InterruptedException * @throws CoordinatedStateException */ - void joinCluster() + void joinCluster(Pair, Boolean> pair) throws IOException, KeeperException, InterruptedException, CoordinatedStateException { long startTime = System.currentTimeMillis(); // Concurrency note: In the below the accesses on regionsInTransition are @@ -407,34 +408,29 @@ public class AssignmentManager { // TODO: Regions that have a null location and are not in regionsInTransitions // need to be handled. - // Scan hbase:meta to build list of existing regions, servers, and assignment - // Returns servers who have not checked in (assumed dead) that some regions - // were assigned to (according to the meta) - Set deadServers = rebuildUserRegions(); - // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from previous master process. - boolean failover = processDeadServersAndRegionsInTransition(deadServers); + processDeadServersAndRegionsInTransition(pair.getFirst(), pair.getSecond()); recoverTableInDisablingState(); recoverTableInEnablingState(); LOG.info("Joined the cluster in " + (System.currentTimeMillis() - - startTime) + "ms, failover=" + failover); + - startTime) + "ms, failover=" + pair.getSecond()); } /** - * Process all regions that are in transition in zookeeper and also - * processes the list of dead servers. - * Used by master joining an cluster. If we figure this is a clean cluster - * startup, will assign all user regions. - * @param deadServers Set of servers that are offline probably legitimately that were carrying - * regions according to a scan of hbase:meta. Can be null. + * Determines whether we're in failover * @throws IOException * @throws InterruptedException */ - boolean processDeadServersAndRegionsInTransition(final Set deadServers) - throws KeeperException, IOException, InterruptedException, CoordinatedStateException { - // TODO Needed? List nodes = ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode); + Pair, Boolean> isFailover() + throws KeeperException, IOException, InterruptedException { + Pair, Boolean> ret = new Pair<>(); + // Scan hbase:meta to build list of existing regions, servers, and assignment + // Returns servers who have not checked in (assumed dead) that some regions + // were assigned to (according to the meta) + ret.setFirst(rebuildUserRegions()); + boolean failover = !serverManager.getDeadServers().isEmpty(); if (failover) { // This may not be a failover actually, especially if meta is on this master. @@ -442,12 +438,12 @@ public class AssignmentManager { LOG.debug("Found dead servers out on cluster " + serverManager.getDeadServers()); } } else { - // If any one region except meta is assigned, it's a failover. + // If any one region except system is assigned, it's a failover. Set onlineServers = serverManager.getOnlineServers().keySet(); for (Map.Entry en: regionStates.getRegionAssignments().entrySet()) { HRegionInfo hri = en.getKey(); - if (!hri.isMetaTable() + if (!hri.isSystemTable() && onlineServers.contains(en.getValue())) { LOG.debug("Found " + hri + " out on cluster"); failover = true; @@ -455,12 +451,12 @@ public class AssignmentManager { } } if (!failover) { - // If any region except meta is in transition on a live server, it's a failover. + // If any region except system is in transition on a live server, it's a failover. Map regionsInTransition = regionStates.getRegionsInTransition(); if (!regionsInTransition.isEmpty()) { for (RegionState regionState: regionsInTransition.values()) { ServerName serverName = regionState.getServerName(); - if (!regionState.getRegion().isMetaRegion() + if (!regionState.getRegion().isSystemTable() && serverName != null && onlineServers.contains(serverName)) { LOG.debug("Found " + regionState + " in RITs"); failover = true; @@ -501,7 +497,24 @@ public class AssignmentManager { } } } - + ret.setSecond(failover); + return ret; + } + + /** + * Process all regions that are in transition in zookeeper and also + * processes the list of dead servers. + * Used by master joining an cluster. If we figure this is a clean cluster + * startup, will assign all user regions. + * @param deadServers Set of servers that are offline probably legitimately that were carrying + * regions according to a scan of hbase:meta. Can be null. + * @param failover + * @throws IOException + * @throws InterruptedException + */ + boolean processDeadServersAndRegionsInTransition(final Set deadServers, + boolean failover) + throws KeeperException, IOException, InterruptedException, CoordinatedStateException { Set disabledOrDisablingOrEnabling = null; Map allRegions = null; @@ -643,6 +656,12 @@ public class AssignmentManager { * Use care with forceNewPlan. It could cause double assignment. */ public void assign(HRegionInfo region, boolean forceNewPlan) { + assign(region, forceNewPlan, false); + } + /** + * Use care with forceNewPlan. It could cause double assignment. + */ + public void assign(HRegionInfo region, boolean forceNewPlan, boolean failover) { if (isDisabledorDisablingRegionInRIT(region)) { return; } @@ -651,10 +670,10 @@ public class AssignmentManager { try { RegionState state = forceRegionStateToOffline(region, forceNewPlan); if (state != null) { - if (regionStates.wasRegionOnDeadServer(encodedName)) { + if (failover && regionStates.wasRegionOnDeadServer(encodedName)) { LOG.info("Skip assigning " + region.getRegionNameAsString() - + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) - + " is dead but not processed yet"); + + ", its host " + regionStates.getLastRegionServerOfRegion(encodedName) + + " is dead but not processed yet - region state: " + state); return; } assign(state, forceNewPlan); @@ -2234,7 +2253,8 @@ public class AssignmentManager { // The region must be opening on this server. // If current state is already opened on the same server, // it could be a reportRegionTransition RPC retry. - if (current == null || !current.isOpeningOrOpenedOnServer(serverName)) { + if (current == null || + (!current.getRegion().isSystemTable() && !current.isOpeningOrOpenedOnServer(serverName))) { return hri.getShortNameToLog() + " is not opening on " + serverName; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index ca721e2..f4eeaf3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -245,6 +245,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { /** Namespace stuff */ private TableNamespaceManager tableNamespaceManager; + Set systemTablesAssignedBeforeInitCompletes = new HashSet(); + // Metrics for the HMaster final MetricsMaster metricsMaster; // file system manager for the master FS operations @@ -599,7 +601,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.mpmHost.register(new MasterFlushTableProcedureManager()); this.mpmHost.loadProcedures(conf); this.mpmHost.initialize(this, this.metricsMaster); + } + Set getSystemTablesBeingAssigned() { + return systemTablesAssignedBeforeInitCompletes; } /** @@ -730,6 +735,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // Make sure meta assigned before proceeding. status.setStatus("Assigning Meta Region"); + systemTablesAssignedBeforeInitCompletes.clear(); assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID); // check if master is shutting down because above assignMeta could return even hbase:meta isn't // assigned when master is shutting down @@ -751,9 +757,16 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.serverManager.processDeadServer(tmpServer, true); } + Pair, Boolean> pair = this.assignmentManager.isFailover(); + if (!pair.getSecond()) { + assignSystemTable(status); + } + // Fix up assignment manager status status.setStatus("Starting assignment manager"); - this.assignmentManager.joinCluster(); + this.assignmentManager.joinCluster(pair); + status.setStatus("Starting namespace manager"); + initNamespace(); // set cluster status again after user regions are assigned this.balancer.setClusterStatus(getClusterStatus()); @@ -769,9 +782,6 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.catalogJanitorChore = new CatalogJanitor(this, this); getChoreService().scheduleChore(catalogJanitorChore); - status.setStatus("Starting namespace manager"); - initNamespace(); - if (this.cpHost != null) { try { this.cpHost.preMasterInitialization(); @@ -873,6 +883,52 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } } + private void assignSystemTable(MonitoredTask status) throws IOException { + List> location = + MetaTableAccessor.getSystemTableRegionsAndLocations(getConnection(), true); + if (location == null || location.isEmpty()) return; + + RegionStates states = this.assignmentManager.getRegionStates(); + Map regionsInTransition = states.getRegionsInTransition(); + String prompt; + for (int idx = 0; idx < location.size(); idx++) { + if (location.get(idx).getFirst().isMetaRegion()) { + // hbase:meta has been assigned + continue; + } + RegionState state = states.getRegionState(location.get(idx).getFirst()); + TableName table = location.get(idx).getFirst().getTable(); + if (location.get(idx).getSecond() == null) { + // there is no recorded server for the table + prompt = "Assigning " + table + " to some server - " + state; + LOG.debug(prompt); + status.setStatus(prompt); + this.assignmentManager.assign(location.get(idx).getFirst(), true); + } else { + ServerName svr = location.get(idx).getSecond(); + if (!this.serverManager.isServerOnline(svr)) { + prompt = "Assigning " + table + " to random server since " + svr + " is not online - " + state; + LOG.debug(prompt); + status.setStatus(prompt); + this.assignmentManager.assign(location.get(idx).getFirst(), true, false); + systemTablesAssignedBeforeInitCompletes.add(table); + continue; + } + List regions = new ArrayList(); + regions.add(location.get(idx).getFirst()); + prompt = "Assigning " + table + " to " + svr + " - " + state; + LOG.debug(prompt); + status.setStatus(prompt); + try { + this.assignmentManager.assign(svr, regions); + } catch (InterruptedException ie) { + throw new InterruptedIOException("Received " + ie.getMessage()); + } + } + systemTablesAssignedBeforeInitCompletes.add(table); + } + } + /** * Check hbase:meta is assigned. If not, assign it. * @param status MonitoredTask @@ -950,6 +1006,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location=" + metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId)); status.setStatus("META assigned."); + systemTablesAssignedBeforeInitCompletes.add(TableName.META_TABLE_NAME); } void initNamespace() throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 9ce53e8..df7edb7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -1347,10 +1347,10 @@ public class MasterRpcServices extends RSRpcServices TableName tableName = ProtobufUtil.toTableName( rt.getRegionInfo(0).getTableName()); RegionStates regionStates = master.assignmentManager.getRegionStates(); - if (!(TableName.META_TABLE_NAME.equals(tableName) - && regionStates.getRegionState(HRegionInfo.FIRST_META_REGIONINFO) != null) + if (!(tableName.isSystemTable() + && regionStates.getRegionState(HRegionInfo.convert(rt.getRegionInfo(0))) != null) && !master.assignmentManager.isFailoverCleanupDone()) { - // Meta region is assigned before master finishes the + // system table regions are assigned before master finishes the // failover cleanup. So no need this check for it throw new PleaseHoldException("Master is rebuilding user regions"); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 5528b07..b10cf3b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -895,7 +895,8 @@ public class RegionStates { continue; } TableName tableName = hri.getTable(); - if (!TableName.META_TABLE_NAME.equals(tableName) + // system tables are assigned ahead of user tables - exclude them + if (!tableName.isSystemTable() && (noExcludeTables || !excludedTables.contains(tableName))) { toBeClosed.add(hri); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java index 02912b9..6a13a53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java @@ -134,15 +134,20 @@ public class TableNamespaceManager { } private NamespaceDescriptor get(Table table, String name) throws IOException { - Result res = table.get(new Get(Bytes.toBytes(name))); - if (res.isEmpty()) { - return null; - } - byte[] val = CellUtil.cloneValue(res.getColumnLatestCell( + try { + Result res = table.get(new Get(Bytes.toBytes(name))); + if (res.isEmpty()) { + return null; + } + byte[] val = CellUtil.cloneValue(res.getColumnLatestCell( HTableDescriptor.NAMESPACE_FAMILY_INFO_BYTES, HTableDescriptor.NAMESPACE_COL_DESC_BYTES)); - return - ProtobufUtil.toNamespaceDescriptor( + return + ProtobufUtil.toNamespaceDescriptor( HBaseProtos.NamespaceDescriptor.parseFrom(val)); + } catch (IOException ioe) { + LOG.debug("get nsd: ", ioe); + throw ioe; + } } private void create(Table table, NamespaceDescriptor ns) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 315659a..8c63e4a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1987,7 +1987,9 @@ public class HRegionServer extends HasThread implements if (code == TransitionCode.OPENED && openSeqNum >= 0) { transition.setOpenSeqNum(openSeqNum); } + StringBuilder sb = new StringBuilder(); for (HRegionInfo hri: hris) { + sb.append(hri + " "); transition.addRegionInfo(HRegionInfo.convert(hri)); } ReportRegionStateTransitionRequest request = builder.build(); @@ -2008,7 +2010,7 @@ public class HRegionServer extends HasThread implements return true; } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); - LOG.info("Failed to report region transition, will retry", ioe); + LOG.info("Failed to report region transition:" + sb.toString() + ", will retry", ioe); if (rssStub == rss) { rssStub = null; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 614f6fb..e860402 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -178,6 +178,8 @@ public class TestMasterFailover { // and he should be active active = masterThreads.get(0).getMaster(); assertNotNull(active); + assertTrue(active.systemTablesAssignedBeforeInitCompletes.contains( + TableName.NAMESPACE_TABLE_NAME)); status = active.getClusterStatus(); ServerName mastername = status.getMaster(); assertTrue(mastername.equals(active.getServerName()));