From 80347ea3891aeb9614ab92a06eacfa39cc99c1f9 Mon Sep 17 00:00:00 2001 From: Mike Drob Date: Thu, 25 Oct 2018 12:39:19 -0500 Subject: [PATCH] HBASE-21380 Filter finished SCP at start --- .../java/org/apache/hadoop/hbase/master/DeadServer.java | 15 ++++++++++++--- .../main/java/org/apache/hadoop/hbase/master/HMaster.java | 6 ++---- .../apache/hadoop/hbase/master/RegionServerTracker.java | 7 +++++-- .../org/apache/hadoop/hbase/master/ServerManager.java | 5 +++-- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index 4183201c61..de094c2564 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -121,17 +121,26 @@ public class DeadServer { return clone; } + /** * Adds the server to the dead server list if it's not there already. * @param sn the server name */ public synchronized void add(ServerName sn) { + add(sn, true); + } + + /** + * Adds the server to the dead server list if it's not there already. + * @param sn the server name + * @param processing whether there is an active SCP associated with the server + */ + public synchronized void add(ServerName sn, boolean processing) { if (!deadServers.containsKey(sn)){ deadServers.put(sn, EnvironmentEdgeManager.currentTime()); } - boolean added = processingServers.add(sn); - if (LOG.isDebugEnabled() && added) { - LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); + if (processing && processingServers.add(sn)) { + LOG.debug("Added {}; numProcessing={}", sn, processingServers.size()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index cd2055286b..93d2b21f4e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -902,10 +902,8 @@ public class HMaster extends HRegionServer implements MasterServices { // TODO: Generate the splitting and live Set in one pass instead of two as we currently do. this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager); this.regionServerTracker.start( - procedureExecutor.getProcedures().stream() - .filter(p -> p instanceof ServerCrashProcedure && !p.isFinished()) - .map(p -> ((ServerCrashProcedure) p).getServerName()) - .collect(Collectors.toSet()), + procedureExecutor.getProcedures().stream().filter(p -> p instanceof ServerCrashProcedure) + .map(p -> ((ServerCrashProcedure) p)).collect(Collectors.toSet()), walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir()); // This manager will be started AFTER hbase:meta is confirmed on line. // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java index f419732eb8..2a3f8b928a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.VersionInfoUtil; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKListener; import org.apache.hadoop.hbase.zookeeper.ZKUtil; @@ -117,7 +118,7 @@ public class RegionServerTracker extends ZKListener { * @param liveServersFromWALDir the live region servers from wal directory. * @param splittingServersFromWALDir Servers whose WALs are being actively 'split'. */ - public void start(Set deadServersFromPE, Set liveServersFromWALDir, + public void start(Set deadServersFromPE, Set liveServersFromWALDir, Set splittingServersFromWALDir) throws KeeperException, IOException { LOG.info("Starting RegionServerTracker; {} have existing ServerCrashProcedures, {} " + @@ -126,7 +127,9 @@ public class RegionServerTracker extends ZKListener { // deadServersFromPE is made from a list of outstanding ServerCrashProcedures. // splittingServersFromWALDir are being actively split -- the directory in the FS ends in // '-SPLITTING'. Each splitting server should have a corresponding SCP. Log if not. - splittingServersFromWALDir.stream().filter(s -> !deadServersFromPE.contains(s)). + Set deadServerNames = deadServersFromPE.stream() + .map(s -> s.getServerName()).collect(Collectors.toSet()); + splittingServersFromWALDir.stream().filter(s -> !deadServerNames.contains(s)). forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s)); watcher.registerListener(this); synchronized (this) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 5489b1e273..8282339ace 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HBaseRpcController; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -316,9 +317,9 @@ public class ServerManager { * @param deadServersFromPE the region servers which already have a SCP associated. * @param liveServersFromWALDir the live region servers from wal directory. */ - void findDeadServersAndProcess(Set deadServersFromPE, + void findDeadServersAndProcess(Set deadServersFromPE, Set liveServersFromWALDir) { - deadServersFromPE.forEach(deadservers::add); + deadServersFromPE.forEach(scp -> deadservers.add(scp.getServerName(), !scp.isFinished())); liveServersFromWALDir.stream().filter(sn -> !onlineServers.containsKey(sn)) .forEach(this::expireServer); } -- 2.16.1