From 469e56a7b13a8b6efe5dbf9804a6ffa583e1f1f3 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Wed, 5 Nov 2014 18:31:44 -0800 Subject: [PATCH] HBASE-12432 RpcRetryingCaller should log after fixed number of retries like AsyncProcess --- .../org/apache/hadoop/hbase/client/AsyncProcess.java | 17 +++++++++++++---- .../apache/hadoop/hbase/client/RpcRetryingCaller.java | 12 ++++++++---- .../hadoop/hbase/client/RpcRetryingCallerFactory.java | 5 ++++- .../apache/hadoop/hbase/client/TestAsyncProcess.java | 4 ++-- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncProcess.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncProcess.java index 61a73c3..d206c71 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncProcess.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncProcess.java @@ -92,6 +92,17 @@ import com.google.common.base.Preconditions; */ class AsyncProcess { private static final Log LOG = LogFactory.getLog(AsyncProcess.class); + + /** + * Configure the number of failures after which the client will start logging. A few failures + * is fine: region moved, then is not opened, then is overloaded. We try to have an acceptable + * heuristic for the number of errors we don't log. 9 was chosen because we wait for 1s at + * this stage. + */ + public static final String START_LOG_ERRORS_AFTER_COUNT_KEY = + "hbase.client.start.log.errors.counter"; + public static final int DEFAULT_START_LOG_ERRORS_AFTER_COUNT = 9; + protected static final AtomicLong COUNTER = new AtomicLong(); protected final long id; private final int startLogErrorsCnt; @@ -230,10 +241,8 @@ class AsyncProcess { this.maxConcurrentTasksPerRegion = conf.getInt(HConstants.HBASE_CLIENT_MAX_PERREGION_TASKS, HConstants.DEFAULT_HBASE_CLIENT_MAX_PERREGION_TASKS); - // A few failure is fine: region moved, then is not opened, then is overloaded. We try - // to have an acceptable heuristic for the number of errors we don't log. - // 9 was chosen because we wait for 1s at this stage. - this.startLogErrorsCnt = conf.getInt("hbase.client.start.log.errors.counter", 9); + this.startLogErrorsCnt = + conf.getInt(START_LOG_ERRORS_AFTER_COUNT_KEY, DEFAULT_START_LOG_ERRORS_AFTER_COUNT); if (this.maxTotalConcurrentTasks <= 0) { throw new IllegalArgumentException("maxTotalConcurrentTasks=" + maxTotalConcurrentTasks); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java index 4344201..9552992 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java @@ -60,13 +60,16 @@ public class RpcRetryingCaller { * Start and end times for a single call. */ private final static int MIN_RPC_TIMEOUT = 2000; + /** How many retries are allowed before we start to log */ + private final int startLogErrorsCnt; private final long pause; private final int retries; - public RpcRetryingCaller(long pause, int retries) { + public RpcRetryingCaller(long pause, int retries, int startLogErrorsCnt) { this.pause = pause; this.retries = retries; + this.startLogErrorsCnt = startLogErrorsCnt; } private void beforeCall() { @@ -113,9 +116,10 @@ public class RpcRetryingCaller { callable.prepare(tries != 0); // if called with false, check table status on ZK return callable.call(); } catch (Throwable t) { - if (LOG.isTraceEnabled()) { - LOG.trace("Call exception, tries=" + tries + ", retries=" + retries + ", retryTime=" + - (EnvironmentEdgeManager.currentTimeMillis() - this.globalStartTime) + "ms", t); + if (tries > startLogErrorsCnt) { + LOG.info("Call exception, tries=" + tries + ", retries=" + retries + ", retryTime=" + + (EnvironmentEdgeManager.currentTimeMillis() - this.globalStartTime) + "ms, msg=" + + callable.getExceptionMessageAdditionalDetail()); } // translateException throws exception when should not retry: i.e. when request is bad. t = translateException(t); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerFactory.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerFactory.java index 9b070a5..53d5c58 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerFactory.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerFactory.java @@ -31,6 +31,7 @@ public class RpcRetryingCallerFactory { protected final Configuration conf; private final long pause; private final int retries; + private final int startLogErrorsCnt; public RpcRetryingCallerFactory(Configuration conf) { this.conf = conf; @@ -38,12 +39,14 @@ public class RpcRetryingCallerFactory { HConstants.DEFAULT_HBASE_CLIENT_PAUSE); retries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); + startLogErrorsCnt = conf.getInt(AsyncProcess.START_LOG_ERRORS_AFTER_COUNT_KEY, + AsyncProcess.DEFAULT_START_LOG_ERRORS_AFTER_COUNT); } public RpcRetryingCaller newCaller() { // We store the values in the factory instance. This way, constructing new objects // is cheap as it does not require parsing a complex structure. - return new RpcRetryingCaller(pause, retries); + return new RpcRetryingCaller(pause, retries, startLogErrorsCnt); } public static RpcRetryingCallerFactory instantiate(Configuration configuration) { diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncProcess.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncProcess.java index 74fbb17..91a5a90 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncProcess.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncProcess.java @@ -107,7 +107,7 @@ public class TestAsyncProcess { protected RpcRetryingCaller createCaller(MultiServerCallable callable) { final MultiResponse mr = createMultiResponse(callable.getLocation(), callable.getMulti(), nbMultiResponse, nbActions); - return new RpcRetryingCaller(100, 10) { + return new RpcRetryingCaller(100, 10, 9) { @Override public MultiResponse callWithoutRetries(RetryingCallable callable, int to) throws IOException, RuntimeException { @@ -127,7 +127,7 @@ public class TestAsyncProcess { static class CallerWithFailure extends RpcRetryingCaller{ public CallerWithFailure() { - super(100, 100); + super(100, 100, 9); } @Override -- 1.9.3 (Apple Git-50)