diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java index e43a712..a2f2a38 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java @@ -1074,7 +1074,8 @@ class ConnectionImplementation implements ClusterConnection, Closeable { long result; ServerErrors errorStats = errorsByServer.get(server); if (errorStats != null) { - result = ConnectionUtils.getPauseTime(basePause, errorStats.getCount()); + // errorStats.getCount() is 1 or more, decrement to start from 0. + result = ConnectionUtils.getPauseTime(basePause, errorStats.getCount() - 1); } else { result = 0; // yes, if the server is not in our list we don't wait before retrying. } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java index 82c2fc4..363a0e0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java @@ -55,6 +55,9 @@ public final class ConnectionUtils { if (ntries >= HConstants.RETRY_BACKOFF.length) { ntries = HConstants.RETRY_BACKOFF.length - 1; } + if (ntries < 0) { + ntries = 0; + } long normalPause = pause * HConstants.RETRY_BACKOFF[ntries]; long jitter = (long)(normalPause * RANDOM.nextFloat() * 0.01f); // 1% possible jitter diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java index 8ff8b8b..725bec0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java @@ -140,7 +140,7 @@ public abstract class RegionAdminServiceCallable implements RetryingCallable< @Override public long sleep(long pause, int tries) { - long sleep = ConnectionUtils.getPauseTime(pause, tries + 1); + long sleep = ConnectionUtils.getPauseTime(pause, tries); if (sleep < MIN_WAIT_DEAD_SERVER && (location == null || connection.isDeadServer(location.getServerName()))) { sleep = ConnectionUtils.addJitter(MIN_WAIT_DEAD_SERVER, 0.10f); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionServerCallable.java index 6bd8f75..41ab57a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionServerCallable.java @@ -127,8 +127,7 @@ public abstract class RegionServerCallable implements RetryingCallable { @Override public long sleep(long pause, int tries) { - // Tries hasn't been bumped up yet so we use "tries + 1" to get right pause time - long sleep = ConnectionUtils.getPauseTime(pause, tries + 1); + long sleep = ConnectionUtils.getPauseTime(pause, tries); if (sleep < MIN_WAIT_DEAD_SERVER && (location == null || getConnection().isDeadServer(location.getServerName()))) { sleep = ConnectionUtils.addJitter(MIN_WAIT_DEAD_SERVER, 0.10f); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java index 6ce4956..c9c968e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java @@ -121,8 +121,8 @@ public class RpcRetryingCallerImpl implements RpcRetryingCaller { } // If the server is dead, we need to wait a little before retrying, to give // a chance to the regions to be - // tries hasn't been bumped up yet so we use "tries + 1" to get right pause time - expectedSleep = callable.sleep(pause, tries + 1); + // get right pause time, start by RETRY_BACKOFF[0] * pause + expectedSleep = callable.sleep(pause, tries); // If, after the planned sleep, there won't be enough time left, we stop now. long duration = singleCallDuration(expectedSleep); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java index ac0a0bd..aea8a2b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.client; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.junit.Test; @@ -54,4 +55,23 @@ public class TestConnectionUtils { assertTrue(retyTimeSet.size() > (retries.length * 0.80)); } + @Test + public void testGetPauseTime() { + long pauseTime; + long baseTime = 100; + pauseTime = ConnectionUtils.getPauseTime(baseTime, -1); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[0])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[0] * 1.01f)); + + for (int i = 0; i < HConstants.RETRY_BACKOFF.length; i++) { + pauseTime = ConnectionUtils.getPauseTime(baseTime, i); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[i])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[i] * 1.01f)); + } + + int length = HConstants.RETRY_BACKOFF.length; + pauseTime = ConnectionUtils.getPauseTime(baseTime, length); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[length - 1])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[length - 1] * 1.01f)); + } }