From 4a724117215eefb3c33ccd175bd9537408419121 Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Thu, 4 Oct 2018 17:47:27 -0700 Subject: [PATCH] HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list --- .../hadoop/hbase/master/DeadServer.java | 49 ++++++++++++------- .../hadoop/hbase/master/TestDeadServer.java | 4 +- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index c1b5180cb6..d46ef15368 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; +import com.google.common.base.Preconditions; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -54,14 +56,9 @@ public class DeadServer { private final Map deadServers = new HashMap(); /** - * Number of dead servers currently being processed - */ - private int numProcessing = 0; - - /** - * Whether a dead server is being processed currently. + * Set of dead servers currently being processed */ - private boolean processing = false; + private final Set processingServers = new HashSet(); /** * A dead server that comes back alive has a different start code. The new start code should be @@ -99,7 +96,9 @@ public class DeadServer { * * @return true if any RS are being processed as dead */ - public synchronized boolean areDeadServersInProgress() { return processing; } + public synchronized boolean areDeadServersInProgress() { + return !processingServers.isEmpty(); + } public synchronized Set copyServerNames() { Set clone = new HashSet(deadServers.size()); @@ -112,10 +111,10 @@ public class DeadServer { * @param sn the server name */ public synchronized void add(ServerName sn) { - processing = true; if (!deadServers.containsKey(sn)){ deadServers.put(sn, EnvironmentEdgeManager.currentTime()); } + processingServers.add(sn); } /** @@ -123,18 +122,26 @@ public class DeadServer { * @param sn ServerName for the dead server. */ public synchronized void notifyServer(ServerName sn) { - if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); } - processing = true; - numProcessing++; + Preconditions.checkState(processingServers.contains(sn), + "processingServers set does not contain " + sn + + " (numProcessing=" + processingServers.size() + ")"); + if (LOG.isDebugEnabled()) { + LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size()); + } } + /** + * Complete processing for this dead server. + * @param sn ServerName for the dead server. + */ public synchronized void finish(ServerName sn) { - numProcessing--; - if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing); - - assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative"; - - if (numProcessing == 0) { processing = false; } + Preconditions.checkState(processingServers.contains(sn), + "processingServers set does not contain " + sn + + " (numProcessing=" + processingServers.size() + ")"); + processingServers.remove(sn); + if (LOG.isDebugEnabled()) { + LOG.debug("Finished " + sn + "; numProcessing=" + processingServers.size()); + } } public synchronized int size() { @@ -164,6 +171,9 @@ public class DeadServer { } sb.append(sn.toString()); } + sb.append(" (numProcessing="); + sb.append(processingServers.size()); + sb.append(')'); return sb.toString(); } @@ -210,6 +220,9 @@ public class DeadServer { * @return true if this server was removed */ public synchronized boolean removeDeadServer(final ServerName deadServerName) { + Preconditions.checkState(!processingServers.contains(deadServerName), + "Asked to remove server still in processingServers set " + deadServerName + + " (numProcessing=" + processingServers.size() + ")"); if (deadServers.remove(deadServerName) == null) { return false; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java index f1a01c5301..8876e705be 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java @@ -116,7 +116,6 @@ public class TestDeadServer { DeadServer d = new DeadServer(); - d.add(hostname123); mee.incValue(1); d.add(hostname1234); @@ -157,14 +156,17 @@ public class TestDeadServer { d.add(hostname1234); Assert.assertEquals(2, d.size()); + d.finish(hostname123); d.removeDeadServer(hostname123); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); d.removeDeadServer(hostname1234); Assert.assertTrue(d.isEmpty()); d.add(hostname1234); Assert.assertFalse(d.removeDeadServer(hostname123_2)); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); Assert.assertTrue(d.removeDeadServer(hostname1234)); Assert.assertTrue(d.isEmpty()); } -- 2.19.0