diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AbstractRegionServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AbstractRegionServerCallable.java index ee9a781..b5ad14b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AbstractRegionServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AbstractRegionServerCallable.java @@ -112,8 +112,7 @@ abstract class AbstractRegionServerCallable implements RetryingCallableBase { @Override public long sleep(long pause, int tries) { - // Tries hasn't been bumped up yet so we use "tries + 1" to get right pause time - long sleep = ConnectionUtils.getPauseTime(pause, tries + 1); + long sleep = ConnectionUtils.getPauseTime(pause, tries); if (sleep < MIN_WAIT_DEAD_SERVER && (location == null || getConnection().isDeadServer(location.getServerName()))) { sleep = ConnectionUtils.addJitter(MIN_WAIT_DEAD_SERVER, 0.10f); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java index 82c2fc4..363a0e0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java @@ -55,6 +55,9 @@ public final class ConnectionUtils { if (ntries >= HConstants.RETRY_BACKOFF.length) { ntries = HConstants.RETRY_BACKOFF.length - 1; } + if (ntries < 0) { + ntries = 0; + } long normalPause = pause * HConstants.RETRY_BACKOFF[ntries]; long jitter = (long)(normalPause * RANDOM.nextFloat() * 0.01f); // 1% possible jitter diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java index 8ff8b8b..725bec0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionAdminServiceCallable.java @@ -140,7 +140,7 @@ public abstract class RegionAdminServiceCallable implements RetryingCallable< @Override public long sleep(long pause, int tries) { - long sleep = ConnectionUtils.getPauseTime(pause, tries + 1); + long sleep = ConnectionUtils.getPauseTime(pause, tries); if (sleep < MIN_WAIT_DEAD_SERVER && (location == null || connection.isDeadServer(location.getServerName()))) { sleep = ConnectionUtils.addJitter(MIN_WAIT_DEAD_SERVER, 0.10f); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java index 41405ec..8f28796 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerImpl.java @@ -124,8 +124,8 @@ public class RpcRetryingCallerImpl implements RpcRetryingCaller { } // If the server is dead, we need to wait a little before retrying, to give // a chance to the regions to be - // tries hasn't been bumped up yet so we use "tries + 1" to get right pause time - expectedSleep = callable.sleep(pause, tries + 1); + // get right pause time, start by RETRY_BACKOFF[0] * pause + expectedSleep = callable.sleep(pause, tries); // If, after the planned sleep, there won't be enough time left, we stop now. long duration = singleCallDuration(expectedSleep); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java index ac0a0bd..aea8a2b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestConnectionUtils.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.client; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.junit.Test; @@ -54,4 +55,23 @@ public class TestConnectionUtils { assertTrue(retyTimeSet.size() > (retries.length * 0.80)); } + @Test + public void testGetPauseTime() { + long pauseTime; + long baseTime = 100; + pauseTime = ConnectionUtils.getPauseTime(baseTime, -1); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[0])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[0] * 1.01f)); + + for (int i = 0; i < HConstants.RETRY_BACKOFF.length; i++) { + pauseTime = ConnectionUtils.getPauseTime(baseTime, i); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[i])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[i] * 1.01f)); + } + + int length = HConstants.RETRY_BACKOFF.length; + pauseTime = ConnectionUtils.getPauseTime(baseTime, length); + assertTrue(pauseTime >= (baseTime * HConstants.RETRY_BACKOFF[length - 1])); + assertTrue(pauseTime <= (baseTime * HConstants.RETRY_BACKOFF[length - 1] * 1.01f)); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java index a0f91f4..878ced6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java @@ -352,6 +352,46 @@ public class TestHCM { } } + /** + * Test starting from 0 index when calculate the backoff time. + */ + @Test + public void testCallableSleep() throws Exception { + HTableDescriptor hdt = TEST_UTIL.createTableDescriptor("HCM-testCallableSleep"); + hdt.addCoprocessor(SleepAndFailFirstTime.class.getName()); + TEST_UTIL.createTable(hdt, new byte[][] { FAM_NAM }).close(); + + Configuration c = new Configuration(TEST_UTIL.getConfiguration()); + c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); + c.setInt(HConstants.HBASE_CLIENT_PAUSE, 30 * 1000); + c.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 40 * 1000); + + Connection connection = ConnectionFactory.createConnection(c); + Table t = connection.getTable(TableName.valueOf("HCM-testCallableSleep")); + if (t instanceof HTable) { + HTable table = (HTable) t; + table.setOperationTimeout(80 * 1000); + // Check that it works. Because 20 + 30 * RETRY_BACKOFF[0] + 20 < 80 + table.get(new Get(FAM_NAM)); + + // Resetting and retrying. + SleepAndFailFirstTime.ct.set(0); + try { + table.setOperationTimeout(60 * 1000); + // Will fail this time. After sleep, there are not enough time for second retry + // Beacuse 20 + 30 * RETRY_BACKOFF[0] + 20 > 60 + table.get(new Get(FAM_NAM)); + Assert.fail("We expect an exception here"); + } catch (SocketTimeoutException e) { + LOG.info("We received an exception, as expected ", e); + } catch (IOException e) { + Assert.fail("Wrong exception:" + e.getMessage()); + } finally { + table.close(); + connection.close(); + } + } + } private void testConnectionClose(boolean allowsInterrupt) throws Exception { TableName tableName = TableName.valueOf("HCM-testConnectionClose" + allowsInterrupt); @@ -1032,7 +1072,7 @@ public class TestHCM { } } - @Ignore ("Test presumes RETRY_BACKOFF will never change; it has") @Test + @Test public void testErrorBackoffTimeCalculation() throws Exception { // TODO: This test would seem to presume hardcoded RETRY_BACKOFF which it should not. final long ANY_PAUSE = 100; @@ -1052,40 +1092,20 @@ public class TestHCM { // Check some backoff values from HConstants sequence. tracker.reportServerError(location); - assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(location, ANY_PAUSE)); + assertEqualsWithJitter(ANY_PAUSE * HConstants.RETRY_BACKOFF[0], tracker.calculateBackoffTime(location, ANY_PAUSE)); tracker.reportServerError(location); tracker.reportServerError(location); tracker.reportServerError(location); - assertEqualsWithJitter(ANY_PAUSE * 5, tracker.calculateBackoffTime(location, ANY_PAUSE)); + assertEqualsWithJitter(ANY_PAUSE * HConstants.RETRY_BACKOFF[3], tracker.calculateBackoffTime(location, ANY_PAUSE)); // All of this shouldn't affect backoff for different location. assertEquals(0, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE)); tracker.reportServerError(diffLocation); - assertEqualsWithJitter(ANY_PAUSE, tracker.calculateBackoffTime(diffLocation, ANY_PAUSE)); + assertEqualsWithJitter(ANY_PAUSE * HConstants.RETRY_BACKOFF[0], tracker.calculateBackoffTime(diffLocation, ANY_PAUSE)); // Check with different base. - assertEqualsWithJitter(ANY_PAUSE * 10, + assertEqualsWithJitter(ANY_PAUSE * 2 * HConstants.RETRY_BACKOFF[3], tracker.calculateBackoffTime(location, ANY_PAUSE * 2)); - - // See that time from last error is taken into account. Time shift is applied after jitter, - // so pass the original expected backoff as the base for jitter. - long timeShift = (long)(ANY_PAUSE * 0.5); - timeMachine.setValue(timeBase + timeShift); - assertEqualsWithJitter((ANY_PAUSE * 5) - timeShift, - tracker.calculateBackoffTime(location, ANY_PAUSE), ANY_PAUSE * 2); - - // However we should not go into negative. - timeMachine.setValue(timeBase + ANY_PAUSE * 100); - assertEquals(0, tracker.calculateBackoffTime(location, ANY_PAUSE)); - - // We also should not go over the boundary; last retry would be on it. - long timeLeft = (long)(ANY_PAUSE * 0.5); - timeMachine.setValue(timeBase + largeAmountOfTime - timeLeft); - assertTrue(tracker.canTryMore(1)); - tracker.reportServerError(location); - assertEquals(timeLeft, tracker.calculateBackoffTime(location, ANY_PAUSE)); - timeMachine.setValue(timeBase + largeAmountOfTime); - assertFalse(tracker.canTryMore(1)); } finally { EnvironmentEdgeManager.reset(); }