From 15d92afd35ca5bea67b8444c60bcaa45acfdc12a Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Tue, 11 Sep 2018 22:35:10 -0700 Subject: [PATCH] HBASE-21035 Meta Table should be able to online even if all procedures are lost Experiment (A bunch of this patch stolen from Allan Yang's posting on HBASE-21035). Problem is a purged master proc wal directory. How to get meta assigned. If all WALs have been removed, then, there is no assign for meta. hbase:namespace is similar. If meta is not online or namespace is not online, master will exit, failing to read meta or namespace. If master exits, we can't inject repair procedures. In this patch, we shuffle around the startup slightly so that the current first scan of meta -- the search for table states is moved till after AM has done a loadMeta; loadMeta is first place where we require meta to be online. Before the call to AM#loadMeta, have added a isMeta which will wait on all SCPs to be done. If all done and meta is still not online, we will assign it. I have a similar method for namespace. Its wrong that onlining namespace blocks startup. To fix, need to roll namespace into meta so namespace table goes away. Problems with these methods are that we have not split any outstanding meta WALs. Its complicated. TODO. Splitting namespace WALs is probably not possible. Maybe it is better to just have Master wait until an operator has inserted an assign of meta (as suggested in issue). But a forced assign of meta is not easy either. Have to search all WAL dirs for meta WALs. Remove these WALs when split? Split in-line? TODO. --- .../org/apache/hadoop/hbase/MetaTableAccessor.java | 9 ++ .../procedure2/store/wal/ProcedureWALFormat.java | 1 - .../store/wal/ProcedureWALFormatReader.java | 5 +- .../org/apache/hadoop/hbase/master/HMaster.java | 154 ++++++++++++++++++++- .../hadoop/hbase/master/MasterRpcServices.java | 5 +- .../hadoop/hbase/master/MasterWalManager.java | 44 ++++-- .../hadoop/hbase/master/TableNamespaceManager.java | 8 +- .../hbase/master/assignment/AssignmentManager.java | 5 +- .../master/procedure/DisableTableProcedure.java | 2 +- .../master/procedure/EnableTableProcedure.java | 2 +- .../hbase/master/procedure/InitMetaProcedure.java | 2 +- .../hadoop/hbase/regionserver/HRegionServer.java | 12 +- .../apache/hadoop/hbase/TestMetaTableAccessor.java | 16 +++ .../TestMetaInitIfAllProceduresLost.java | 93 +++++++++++++ 14 files changed, 325 insertions(+), 33 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMetaInitIfAllProceduresLost.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 60afaca389..0a12a31a08 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -716,6 +716,14 @@ public class MetaTableAccessor { scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); } + /** + * A do-nothing check that we can scan meta. Just tries to Scan a row out of hbase:meta. + * @param connection + * @throws IOException + */ + public static void isScanMeta(Connection connection) throws IOException { + scanMeta(connection, null, null, QueryType.REGION, 1, r -> false); + } /** * Performs a scan of META table. @@ -1136,6 +1144,7 @@ public class MetaTableAccessor { /** * Implementations 'visit' a catalog table row. */ + @FunctionalInterface public interface Visitor { /** * Visit the catalog table row. diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormat.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormat.java index da8af84259..ac3a52941e 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormat.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormat.java @@ -83,7 +83,6 @@ public final class ProcedureWALFormat { // Ignore the last log which is current active log. while (logs.hasNext()) { ProcedureWALFile log = logs.next(); - LOG.debug("Loading WAL id={}", log.getLogId()); log.open(); try { reader.read(log); diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormatReader.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormatReader.java index d1deb1816e..4ab70f18e1 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormatReader.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/wal/ProcedureWALFormatReader.java @@ -140,6 +140,7 @@ public class ProcedureWALFormatReader { LOG.info("Rebuilding tracker for " + log); } + long count = 0; FSDataInputStream stream = log.getStream(); try { boolean hasMore = true; @@ -149,6 +150,7 @@ public class ProcedureWALFormatReader { LOG.warn("Nothing left to decode. Exiting with missing EOF, log=" + log); break; } + count++; switch (entry.getType()) { case PROCEDURE_WAL_INIT: readInitEntry(entry); @@ -170,8 +172,9 @@ public class ProcedureWALFormatReader { throw new CorruptedWALProcedureStoreException("Invalid entry: " + entry); } } + LOG.info("Read {} entries in {}", count, log); } catch (InvalidProtocolBufferException e) { - LOG.error("While reading procedure from " + log, e); + LOG.error("While reading entry #{} in {}", count, log, e); loader.markCorruptedWAL(log, e); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index f3fb989315..14efc3528b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -85,6 +85,9 @@ import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableState; @@ -916,10 +919,14 @@ public class HMaster extends HRegionServer implements MasterServices { status.setStatus("Initializing master coprocessors"); this.cpHost = new MasterCoprocessorHost(this, this.conf); + // Checking if meta needs initializing. status.setStatus("Initializing meta table if this is a new deploy"); InitMetaProcedure initMetaProc = null; - if (assignmentManager.getRegionStates().getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO) - .isOffline()) { + // Print out state of hbase:meta on startup. + RegionState rs = this.assignmentManager.getRegionStates(). + getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO); + LOG.info("hbase:meta {}", rs); + if (rs.isOffline()) { Optional> optProc = procedureExecutor.getProcedures().stream() .filter(p -> p instanceof InitMetaProcedure).findAny(); if (optProc.isPresent()) { @@ -946,7 +953,6 @@ public class HMaster extends HRegionServer implements MasterServices { if (initMetaProc != null) { initMetaProc.await(); } - tableStateManager.start(); // Wake up this server to check in sleeper.skipSleepCycle(); @@ -963,7 +969,7 @@ public class HMaster extends HRegionServer implements MasterServices { return; } - //Initialize after meta as it scans meta + // Initialize after meta as it scans meta if (favoredNodesManager != null) { SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment = new SnapshotOfRegionAssignmentFromMeta(getConnection()); @@ -971,9 +977,21 @@ public class HMaster extends HRegionServer implements MasterServices { favoredNodesManager.initialize(snapshotOfRegionAssignment); } - // Fix up assignment manager status + // From here on out, meta needs to be online. No point progressing if not online. Wait. + if (!isMeta()) { + return; + } + status.setStatus("Starting assignment manager"); + // The below cannot make progress w/o hbase:meta being online. + // This is the FIRST attempt at going to hbase:meta. Meta onlining is going on in background + // as procedures run -- in particular SCPs for crashed servers... in particular the one that + // was carrying hbase;meta. It may take a while to come online. The joinCluster uses a + // special Connection down in its guts at the MetaTableAccessor; one that keeps retrying rather + // than give up after a minute as the default does. this.assignmentManager.joinCluster(); + // The below depends on hbase:meta being online. + this.tableStateManager.start(); // set cluster status again after user regions are assigned this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor()); @@ -989,6 +1007,10 @@ public class HMaster extends HRegionServer implements MasterServices { this.catalogJanitorChore = new CatalogJanitor(this); getChoreService().scheduleChore(catalogJanitorChore); + // Here we expect hbase:namespace to be online. See inside initClusterSchemaService. + if (!isNamespace()) { + return; + } status.setStatus("Starting cluster schema service"); initClusterSchemaService(); @@ -1064,6 +1086,128 @@ public class HMaster extends HRegionServer implements MasterServices { } } + /** + * Check hbase:meta is up and ready for reading. For use during Master startup only. + * @return True if meta is UP and startup can progress, otherwise, we failed getting meta UP. + */ + @VisibleForTesting + public boolean isMeta() throws InterruptedException { + RegionInfo ri = RegionInfoBuilder.FIRST_META_REGIONINFO; + while (!isStopped()) { + RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); + if (rs.isOpened()) { + if (this.getServerManager().isServerOnline(rs.getServerName())) { + if (isScanMeta()) { + LOG.info("Verified {}", rs); + return true; + } + // If this fails, then I should mark meta as OFFLINE. TODO. + } + } + Optional> optProc = isSCPs(); + if (optProc.isPresent()) { + LOG.info("SCPs"); + // TODO: Are SCPs still present but waiting on hbase:meta for deploy? What to do in this + // case? + Threads.sleep(10000); + } else { + // TODO: Need to recover meta WALs. InitMetaProcedure doesn't do this. Could be loss of + // meta edits. + LOG.info("Forcing assign {}; POSSIBLE loss of hbase:meta edits!", ri); + InitMetaProcedure imp = new InitMetaProcedure(); + procedureExecutor.submitProcedure(imp); + imp.await(); + } + } + return false; + } + + /** + * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table + * (TODO: Fix this! NS should not hold-up startup). + * If no SCPs and namespace table is not up, then something wrong. For now, just assign regions + * though it may mean table comes online missing edits. + * @return True if namespace table is up/online or that we've added assigns for its regions. + */ + @VisibleForTesting + public boolean isNamespace() throws InterruptedException { + List ris = this.assignmentManager.getRegionStates(). + getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME); + if (ris.isEmpty()) { + // If empty, means we've not assigned the namespace table yet... Just return true so startup + // continues and the namespace table gets created. + return true; + } + // Else there are namespace regions up in meta. Lets ensure they are assigned before we go + // further. + VERIFIED: for (RegionInfo ri: ris) { + while (!isStopped()) { + RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); + if (rs.isOpened()) { + if (this.getServerManager().isServerOnline(rs.getServerName())) { + if (isScanRegion(ri)) { + LOG.info("Verified {}", rs); + break VERIFIED; + } + // If this fails, then I should mark as OFFLINE. TODO. + } + } + Optional> optProc = isSCPs(); + if (optProc.isPresent()) { + LOG.info("SCPs"); + Threads.sleep(10000); + } else { + // Cannot recover the WALs for namespace because namespace is like a user-table and so + // edits are mixed in with other user-space edits in random WALs; not like meta with + // dedicated WAL-for-meta. + LOG.info("Forcing assign {}; POSSIBLE loss of hbase:namespace edits!", ri); + this.procedureExecutor.submitProcedure(this.assignmentManager.createAssignProcedure(ri)); + // Break out... presume this region will come online soon enough. + break VERIFIED; + } + } + } + return true; + } + + /** + * @return True if running SCPs. + */ + private Optional> isSCPs() { + return this.procedureExecutor.getProcedures().stream(). + filter(p -> p instanceof ServerCrashProcedure).findAny(); + } + + /** + * @return True if we can scan meta. + */ + private boolean isScanMeta() { + try { + MetaTableAccessor.isScanMeta(getConnection()); + return true; + } catch (IOException e) { + LOG.warn("Failed meta scan attempt", e); + return false; + } + } + + /** + * @return True if we can scan the passed-in region. + */ + private boolean isScanRegion(RegionInfo ri) { + try (Table t = getConnection().getTable(ri.getTable())) { + Scan s = new Scan(ri.getStartKey()); + s.setLimit(1); + try (ResultScanner rs = t.getScanner(s)) { + rs.next(); + } + return true; + } catch (IOException e) { + e.printStackTrace(); + } + return false; + } + /** * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to * automatically remove quotas for a table when that table is deleted. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 4e41783607..598853f789 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -525,9 +525,8 @@ public class MasterRpcServices extends RSRpcServices RpcController controller, ReportRSFatalErrorRequest request) throws ServiceException { String errorText = request.getErrorMessage(); ServerName sn = ProtobufUtil.toServerName(request.getServer()); - String msg = "Region server " + sn - + " reported a fatal error:\n" + errorText; - LOG.error(msg); + String msg = sn + " reported a fatal error:\n" + errorText; + LOG.warn(msg); master.rsFatals.add(msg); return ReportRSFatalErrorResponse.newBuilder().build(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java index d716a11619..346052db55 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterWalManager.java @@ -144,18 +144,33 @@ public class MasterWalManager { } /** - * @return listing of ServerNames found in the filesystem under the WAL directory - * that COULD BE 'alive'; excludes those that have a '-splitting' suffix as these are already - * being split -- they cannot be 'alive'. + * Get Servernames which are currently splitting; paths have a '-splitting' suffix. + * @return ServerName + * @throws IOException IOException + */ + public Set getSplittingServersFromWALDir() throws IOException { + return getServerNamesFromWALDirPath( + p -> p.getName().endsWith(AbstractFSWALProvider.SPLITTING_EXT)); + } + + /** + * Get Servernames that COULD BE 'alive'; excludes those that have a '-splitting' suffix as these + * are already being split -- they cannot be 'alive'. + * @return ServerName + * @throws IOException IOException */ public Set getLiveServersFromWALDir() throws IOException { - Path walDirPath = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); - FileStatus[] walDirForLiveServers = FSUtils.listStatus(fs, walDirPath, - p -> !p.getName().endsWith(AbstractFSWALProvider.SPLITTING_EXT)); - if (walDirForLiveServers == null) { - return Collections.emptySet(); - } - return Stream.of(walDirForLiveServers).map(s -> { + return getServerNamesFromWALDirPath( + p -> !p.getName().endsWith(AbstractFSWALProvider.SPLITTING_EXT)); + } + + /** + * @return listing of ServerNames found by parsing WAL directory paths in FS. + * + */ + public Set getServerNamesFromWALDirPath(final PathFilter filter) throws IOException { + FileStatus[] walDirForServerNames = getWALDirPaths(filter); + return Stream.of(walDirForServerNames).map(s -> { ServerName serverName = AbstractFSWALProvider.getServerNameFromWALDirectoryName(s.getPath()); if (serverName == null) { LOG.warn("Log folder {} doesn't look like its name includes a " + @@ -167,6 +182,15 @@ public class MasterWalManager { }).filter(s -> s != null).collect(Collectors.toSet()); } + /** + * @return List of all RegionServer WAL dirs; i.e. this.rootDir/HConstants.HREGION_LOGDIR_NAME. + */ + public FileStatus[] getWALDirPaths(final PathFilter filter) throws IOException { + Path walDirPath = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); + FileStatus[] walDirForServerNames = FSUtils.listStatus(fs, walDirPath, filter); + return walDirForServerNames == null? new FileStatus[0]: walDirForServerNames; + } + /** * Inspect the log directory to find dead servers which need recovery work * @return A set of ServerNames which aren't running but still have WAL files left in file system diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java index 0b4e35bf19..aefeebe098 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java @@ -383,12 +383,16 @@ public class TableNamespaceManager implements Stoppable { return; } try { - this.zkNamespaceManager.stop(); + if (this.zkNamespaceManager != null) { + this.zkNamespaceManager.stop(); + } } catch (IOException ioe) { LOG.warn("Failed NamespaceManager close", ioe); } try { - this.nsTable.close(); + if (this.nsTable != null) { + this.nsTable.close(); + } } catch (IOException ioe) { LOG.warn("Failed Namespace Table close", ioe); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index ce33e5204c..e33ffdf558 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1186,8 +1186,9 @@ public class AssignmentManager implements ServerListener { long startTime = System.nanoTime(); LOG.debug("Joining cluster..."); - // Scan hbase:meta to build list of existing regions, servers, and assignment - // hbase:meta is online when we get to here and TableStateManager has been started. + // Scan hbase:meta to build list of existing regions, servers, and assignment. + // hbase:meta is online now or will be. Inside loadMeta, we keep trying. Can't make progress + // w/o meta. loadMeta(); while (master.getServerManager().countOfRegionServers() < 1) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java index 3a2a9521f4..dd1034ec0a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java @@ -256,7 +256,7 @@ public class DisableTableProcedure TableStateManager tsm = env.getMasterServices().getTableStateManager(); TableState ts = tsm.getTableState(tableName); if (!ts.isEnabled()) { - LOG.info("Not ENABLED skipping {}", this); + LOG.info("Not ENABLED, state={}, skipping disable; {}", ts.getState(), this); setFailure("master-disable-table", new TableNotEnabledException(ts.toString())); canTableBeDisabled = false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java index c46070cd58..a1f56c2333 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java @@ -335,7 +335,7 @@ public class EnableTableProcedure TableStateManager tsm = env.getMasterServices().getTableStateManager(); TableState ts = tsm.getTableState(tableName); if(!ts.isDisabled()){ - LOG.info("Not DISABLED tableState=" + ts + "; skipping enable"); + LOG.info("Not DISABLED tableState={}; skipping enable; {}", ts.getState(), this); setFailure("master-enable-table", new TableNotDisabledException(ts.toString())); canTableBeEnabled = false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java index d9846326d2..ec30c8abf5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java @@ -117,4 +117,4 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure + UTIL.getMiniHBaseCluster().getMaster().getMasterWalManager() + .getLiveServersFromWALDir().size() == 0); + Thread.sleep(1000); + Path procedureWals = new Path( + UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem() + .getRootDir(), MASTER_PROCEDURE_LOGDIR); + //Kill the master + UTIL.getMiniHBaseCluster().killAll(); + //Delte all procedure log to create an anomaly + for (FileStatus file : UTIL.getTestFileSystem().listStatus(procedureWals)) { + LOG.info("removing " + file); + UTIL.getTestFileSystem().delete(file.getPath()); + } + UTIL.getMiniHBaseCluster().startMaster(); + UTIL.getMiniHBaseCluster().startRegionServer(); + UTIL.getMiniHBaseCluster().startRegionServer(); + UTIL.getMiniHBaseCluster().startRegionServer(); + //Master should able to finish init even if all procedures are lost + UTIL.waitFor(30000, () -> UTIL.getMiniHBaseCluster().getMaster() != null && UTIL + .getMiniHBaseCluster().getMaster().isInitialized()); + } +} -- 2.16.3