diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java index 278339d..e9bfff9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java @@ -174,7 +174,8 @@ public abstract class ServerCallable implements Callable { prepare(tries != 0); // if called with false, check table status on ZK return call(); } catch (Throwable t) { - LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries, t); + LOG.warn("Call exception, tries=" + tries + ", numRetries=" + numRetries + ", retryTime=" + + (this.globalStartTime - System.currentTimeMillis()) + "ms", t); t = translateException(t); // translateException throws an exception when we should not retry, i.e. when it's the diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java index dbd4f7d..e502a1b 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestClientNoCluster.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService.Blo import org.apache.hadoop.hbase.util.Bytes; import org.junit.Before; import org.junit.Test; +import org.junit.Ignore; import org.mockito.Mockito; import com.google.protobuf.RpcController; @@ -92,6 +93,36 @@ public class TestClientNoCluster { } /** + * Remove the @Ignore to try out timeout and retry asettings + * @throws IOException + */ + // @Ignore + @Test + public void testTimeoutAndRetries() throws IOException { + Configuration localConfig = HBaseConfiguration.create(this.conf); + // This override mocks up our exists/get call to throw a RegionServerStoppedException. + localConfig.set("hbase.client.connection.impl", RpcTimeoutConnection.class.getName()); + HTable table = new HTable(localConfig, HConstants.META_TABLE_NAME); + Throwable t = null; + LOG.info("Start"); + try { + // An exists call turns into a get w/ a flag. + table.exists(new Get(Bytes.toBytes("abc"))); + } catch (SocketTimeoutException e) { + // I expect this exception. + LOG.info("Got expected exception", e); + t = e; + } catch (RetriesExhaustedException e) { + // This is the old, unwanted behavior. If we get here FAIL!!! + fail(); + } finally { + table.close(); + } + LOG.info("Stop"); + assertTrue(t != null); + } + + /** * Test that operation timeout prevails over rpc default timeout and retries, etc. * @throws IOException */ @@ -102,7 +133,7 @@ public class TestClientNoCluster { localConfig.set("hbase.client.connection.impl", RpcTimeoutConnection.class.getName()); int pause = 10; localConfig.setInt("hbase.client.pause", pause); - localConfig.setInt("hbase.client.retries.number", 10); + localConfig.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 10); // Set the operation timeout to be < the pause. Expectation is that after first pause, we will // fail out of the rpc because the rpc timeout will have been set to the operation tiemout // and it has expired. Otherwise, if this functionality is broke, all retries will be run -- @@ -263,4 +294,4 @@ public class TestClientNoCluster { return this.stub; } } -} +} \ No newline at end of file diff --git a/hbase-client/src/test/resources/hbase-site.xml b/hbase-client/src/test/resources/hbase-site.xml index ffeb0ef..ab4d1cd 100644 --- a/hbase-client/src/test/resources/hbase-site.xml +++ b/hbase-client/src/test/resources/hbase-site.xml @@ -25,13 +25,4 @@ hbase.defaults.for.version.skip true - - hbase.client.retries.number - 5 - Maximum retries. Used as maximum for all retryable - operations such as fetching of the root region from root region - server, getting a cell's value, starting a row update, etc. - Default: 10. - - diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 7001ee9..5cfd340 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -495,7 +495,7 @@ public final class HConstants { * This is a retry backoff multiplier table similar to the BSD TCP syn * backoff table, a bit more aggressive than simple exponential backoff. */ - public static int RETRY_BACKOFF[] = { 1, 1, 1, 2, 2, 4, 4, 8, 16, 32, 64 }; + public static int RETRY_BACKOFF[] = { 1, 10, 100 }; public static final String REGION_IMPL = "hbase.hregion.impl"; @@ -574,7 +574,7 @@ public final class HConstants { /** * Default value of {@link #HBASE_CLIENT_RETRIES_NUMBER}. */ - public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 20; + public static int DEFAULT_HBASE_CLIENT_RETRIES_NUMBER = 31; /** * Parameter name for client prefetch limit, used as the maximum number of regions @@ -729,7 +729,7 @@ public final class HConstants { public static final boolean DEFAULT_DISALLOW_WRITES_IN_RECOVERING_CONFIG = false; /** Conf key that specifies timeout value to wait for a region ready */ - public static final String LOG_REPLAY_WAIT_REGION_TIMEOUT = + public static final String LOG_REPLAY_WAIT_REGION_TIMEOUT = "hbase.master.log.replay.wait.region.timeout"; /** @@ -796,7 +796,7 @@ public final class HConstants { /* Name of old snapshot directory. See HBASE-8352 for details on why it needs to be renamed */ public static final String OLD_SNAPSHOT_DIR_NAME = ".snapshot"; - + /** Temporary directory used for table creation and deletion */ public static final String HBASE_TEMP_DIRECTORY = ".tmp"; diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 1a4ea23..a630ffe 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -429,7 +429,7 @@ possible configurations would overwhelm and obscure the important. hbase.client.retries.number - 14 + 32 Maximum retries. Used as maximum for all retryable operations such as the getting of a cell's value, starting a row update, etc. Retry interval is a rough function based on hbase.client.pause. At diff --git a/hbase-server/src/test/resources/hbase-site.xml b/hbase-server/src/test/resources/hbase-site.xml index abff5f5..07213b7 100644 --- a/hbase-server/src/test/resources/hbase-site.xml +++ b/hbase-server/src/test/resources/hbase-site.xml @@ -30,25 +30,10 @@ - hbase.client.pause - 1000 - General client pause value. Used mostly as value to wait - before running a retry of a failed get, region lookup, etc. - - hbase.defaults.for.version.skip true - hbase.client.retries.number - 20 - Maximum retries. Used as maximum for all retryable - operations such as fetching of the root region from root region - server, getting a cell's value, starting a row update, etc. - Default: 20. - - - hbase.server.thread.wakefrequency 1000 Time to sleep in between searches for work (in milliseconds).