From bbe5f711ac9ff7acf126c137d524ecb9e2a10d3f Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Thu, 4 Oct 2018 17:47:27 -0700 Subject: [PATCH] HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list --- .../hadoop/hbase/master/DeadServer.java | 63 +++++++++++++------ .../hadoop/hbase/master/TestDeadServer.java | 4 +- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index c1b5180cb6..d7bfb49c1a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; +import com.google.common.base.Preconditions; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -54,14 +56,9 @@ public class DeadServer { private final Map deadServers = new HashMap(); /** - * Number of dead servers currently being processed - */ - private int numProcessing = 0; - - /** - * Whether a dead server is being processed currently. + * Set of dead servers currently being processed */ - private boolean processing = false; + private final Set processingServers = new HashSet(); /** * A dead server that comes back alive has a different start code. The new start code should be @@ -99,7 +96,9 @@ public class DeadServer { * * @return true if any RS are being processed as dead */ - public synchronized boolean areDeadServersInProgress() { return processing; } + public synchronized boolean areDeadServersInProgress() { + return !processingServers.isEmpty(); + } public synchronized Set copyServerNames() { Set clone = new HashSet(deadServers.size()); @@ -112,10 +111,13 @@ public class DeadServer { * @param sn the server name */ public synchronized void add(ServerName sn) { - processing = true; if (!deadServers.containsKey(sn)){ deadServers.put(sn, EnvironmentEdgeManager.currentTime()); } + processingServers.add(sn); + if (LOG.isDebugEnabled()) { + LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); + } } /** @@ -123,18 +125,24 @@ public class DeadServer { * @param sn ServerName for the dead server. */ public synchronized void notifyServer(ServerName sn) { - if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); } - processing = true; - numProcessing++; + processingServers.add(sn); + if (LOG.isDebugEnabled()) { + LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size()); + } } + /** + * Complete processing for this dead server. + * @param sn ServerName for the dead server. + */ public synchronized void finish(ServerName sn) { - numProcessing--; - if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing); - - assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative"; - - if (numProcessing == 0) { processing = false; } + Preconditions.checkState(processingServers.contains(sn), + "processingServers set does not contain " + sn + + " (numProcessing=" + processingServers.size() + ")"); + processingServers.remove(sn); + if (LOG.isDebugEnabled()) { + LOG.debug("Finished " + sn + "; numProcessing=" + processingServers.size()); + } } public synchronized int size() { @@ -150,6 +158,9 @@ public class DeadServer { while (it.hasNext()) { ServerName sn = it.next(); if (ServerName.isSameHostnameAndPort(sn, newServerName)) { + Preconditions.checkState(!processingServers.contains(sn), + "Asked to clean server still in processingServers set " + sn + + " (numProcessing=" + processingServers.size() + ")"); it.remove(); } } @@ -157,13 +168,24 @@ public class DeadServer { @Override public synchronized String toString() { + // Display unified set of servers from both maps + Set servers = new HashSet(); + servers.addAll(deadServers.keySet()); + servers.addAll(processingServers); StringBuilder sb = new StringBuilder(); - for (ServerName sn : deadServers.keySet()) { + for (ServerName sn : servers) { if (sb.length() > 0) { sb.append(", "); } sb.append(sn.toString()); + // Star entries that are being processed + if (processingServers.contains(sn)) { + sb.append("*"); + } } + sb.append(" (numProcessing="); + sb.append(processingServers.size()); + sb.append(')'); return sb.toString(); } @@ -210,6 +232,9 @@ public class DeadServer { * @return true if this server was removed */ public synchronized boolean removeDeadServer(final ServerName deadServerName) { + Preconditions.checkState(!processingServers.contains(deadServerName), + "Asked to remove server still in processingServers set " + deadServerName + + " (numProcessing=" + processingServers.size() + ")"); if (deadServers.remove(deadServerName) == null) { return false; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java index f1a01c5301..8876e705be 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java @@ -116,7 +116,6 @@ public class TestDeadServer { DeadServer d = new DeadServer(); - d.add(hostname123); mee.incValue(1); d.add(hostname1234); @@ -157,14 +156,17 @@ public class TestDeadServer { d.add(hostname1234); Assert.assertEquals(2, d.size()); + d.finish(hostname123); d.removeDeadServer(hostname123); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); d.removeDeadServer(hostname1234); Assert.assertTrue(d.isEmpty()); d.add(hostname1234); Assert.assertFalse(d.removeDeadServer(hostname123_2)); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); Assert.assertTrue(d.removeDeadServer(hostname1234)); Assert.assertTrue(d.isEmpty()); } -- 2.19.0