From 07d85cf2e6efcc8f56e3a1fe47d3675eea7d0052 Mon Sep 17 00:00:00 2001 From: Mingliang Liu Date: Thu, 13 Sep 2018 22:24:59 -0700 Subject: [PATCH] HBASE-21164 reportForDuty to spew fewer logs if master is not ready --- .../apache/hadoop/hbase/master/HMaster.java | 3 +- .../hbase/regionserver/HRegionServer.java | 26 ++++-- .../TestRegionServerReportForDuty.java | 89 +++++++++++++++++++ 3 files changed, 108 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index f51496af23..6d7e2003cc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2913,7 +2913,8 @@ public class HMaster extends HRegionServer implements MasterServices { } } - void checkServiceStarted() throws ServerNotRunningYetException { + @VisibleForTesting + protected void checkServiceStarted() throws ServerNotRunningYetException { if (!serviceStarted) { throw new ServerNotRunningYetException("Server is not running yet"); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 02815c532a..29d91603e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -943,11 +943,17 @@ public class HRegionServer extends HasThread implements // Try and register with the Master; tell it we are here. Break if // server is stopped or the clusterup flag is down or hdfs went wacky. // Once registered successfully, go ahead and start up all Services. + int retryCount = 0; while (keepLooping()) { - RegionServerStartupResponse w = reportForDuty(); + boolean spewLog = (retryCount % 100 == 0); + RegionServerStartupResponse w = reportForDuty(spewLog); if (w == null) { - LOG.warn("reportForDuty failed; sleeping and then retrying."); + if (spewLog) { + LOG.warn("reportForDuty failed; sleeping and then retrying every {} ms.", + sleeper.getPeriod()); + } this.sleeper.sleep(); + retryCount++; } else { handleReportForDutyResponse(w); break; @@ -2582,14 +2588,14 @@ public class HRegionServer extends HasThread implements return !this.stopped && isClusterUp(); } - /* + /** * Let the master know we're here Run initialization using parameters passed * us by the master. - * @return A Map of key/value configurations we got from the Master else - * null if we failed to register. - * @throws IOException + * @param spewLog Log the INFO level activity if true + * @return A Map of key/value configurations from the Master else null if we failed to register. + * @throws IOException if get ServiceException from master */ - private RegionServerStartupResponse reportForDuty() throws IOException { + private RegionServerStartupResponse reportForDuty(boolean spewLog) throws IOException { if (this.masterless) return RegionServerStartupResponse.getDefaultInstance(); ServerName masterServerName = createRegionServerStatusStub(true); if (masterServerName == null) return null; @@ -2600,8 +2606,10 @@ public class HRegionServer extends HasThread implements rpcServices.rpcScanRequestCount.reset(); rpcServices.rpcMultiRequestCount.reset(); rpcServices.rpcMutateRequestCount.reset(); - LOG.info("reportForDuty to master=" + masterServerName + " with port=" - + rpcServices.isa.getPort() + ", startcode=" + this.startcode); + if (spewLog) { + LOG.info("reportForDuty to master={} with port={}, startcode={}", masterServerName, + rpcServices.isa.getPort(), this.startcode); + } long now = EnvironmentEdgeManager.currentTime(); int port = rpcServices.isa.getPort(); RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java index 83632be678..af0794ece6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java @@ -21,6 +21,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.io.StringWriter; + +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -28,11 +31,18 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; + +import org.apache.log4j.Appender; +import org.apache.log4j.Layout; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.WriterAppender; import org.apache.zookeeper.KeeperException; import org.junit.After; import org.junit.Before; @@ -77,6 +87,85 @@ public class TestRegionServerReportForDuty { testUtil.shutdownMiniDFSCluster(); } + /** + * LogCapturer is similar to {@link org.apache.hadoop.test.GenericTestUtils.LogCapturer} + * except that this implementation has a default appender to the root logger. + * Hadoop 2.8+ supports the default appender in the LogCapture it ships and this can be replaced. + * TODO: This class can be removed after we upgrade Hadoop dependency. + */ + static class LogCapturer { + private StringWriter sw = new StringWriter(); + private WriterAppender appender; + private org.apache.log4j.Logger logger; + + LogCapturer(org.apache.log4j.Logger logger) { + this.logger = logger; + Appender defaultAppender = org.apache.log4j.Logger.getRootLogger().getAppender("stdout"); + if (defaultAppender == null) { + defaultAppender = org.apache.log4j.Logger.getRootLogger().getAppender("console"); + } + final Layout layout = (defaultAppender == null) ? new PatternLayout() : + defaultAppender.getLayout(); + this.appender = new WriterAppender(layout, sw); + this.logger.addAppender(this.appender); + } + + String getOutput() { + return sw.toString(); + } + + void stopCapturing() { + this.logger.removeAppender(this.appender); + } + } + + /** + * This test HMaster class will always throw ServerNotRunningYetException if checked. + */ + public static class NeverInitializedMaster extends HMaster { + public NeverInitializedMaster(Configuration conf) throws IOException, KeeperException { + super(conf); + } + + @Override + protected void checkServiceStarted() throws ServerNotRunningYetException { + throw new ServerNotRunningYetException("Server is not running yet"); + } + } + + /** + * Tests region server should spew less log when reporting for duty if master is not ready. + */ + @Test + public void testReportForDutySpewLessLog() throws IOException, InterruptedException { + cluster.getConfiguration().set(HConstants.MASTER_IMPL, NeverInitializedMaster.class.getName()); + master = cluster.addMaster(); + master.start(); + + LogCapturer capturer = new LogCapturer(org.apache.log4j.Logger.getLogger(HRegionServer.class)); + int msginterval = 100; + cluster.getConfiguration().setInt("hbase.regionserver.msginterval", msginterval); + rs = cluster.addRegionServer(); + rs.start(); + + int interval = 20_000; + Thread.sleep(interval); + capturer.stopCapturing(); + String output = capturer.getOutput(); + LOG.info("{}", output); + String failMsg = "reportForDuty failed;"; + int actualLogSpewed = StringUtils.countMatches(output, failMsg); + + // Following asserts the actual logging number is close to every 100 retrying times. + // Ideally we can assert the exact logging spewed. We relax here to tolerate contention error. + int expectedLogSpewed = (interval / msginterval) / 100; + LOG.info("Expected log number: {}, actual log number: {}", expectedLogSpewed, actualLogSpewed); + assertTrue(String.format("reportForDuty spews log %d times, less than expected min %d", + actualLogSpewed, expectedLogSpewed / 2), actualLogSpewed > expectedLogSpewed / 2); + assertTrue(String.format("reportForDuty spews log %d times, more than expected max %d", + actualLogSpewed, expectedLogSpewed * 2), actualLogSpewed < expectedLogSpewed * 2); + } + /** * Tests region sever reportForDuty with backup master becomes primary master after * the first master goes away. -- 2.19.0