Index: conf/hbase-default.xml =================================================================== --- conf/hbase-default.xml (revision 680847) +++ conf/hbase-default.xml (working copy) @@ -112,11 +112,11 @@ hbase.client.retries.number - 5 + 10 Maximum retries. Used as maximum for all retryable operations such as fetching of the root region from root region server, getting a cell's value, starting a row update, etc. - Default: 5. + Default: 10. Index: src/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/java/org/apache/hadoop/hbase/HConstants.java (revision 680847) +++ src/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -226,4 +226,10 @@ public static final String NAME = "NAME"; public static final String VERSIONS = "VERSIONS"; public static final String IN_MEMORY = "IN_MEMORY"; + + /** + * This is a retry backoff multiplier table similar to the BSD TCP syn + * backoff table, a bit more aggressive than simple exponential backoff. + */ + public static int RETRY_BACKOFF[] = { 1, 1, 1, 1, 2, 4, 8, 16, 32, 64 }; } \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 680847) +++ src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -163,14 +163,20 @@ "Unable to find region server interface " + serverClassName, e); } - this.pause = conf.getLong("hbase.client.pause", 30 * 1000); - this.numRetries = conf.getInt("hbase.client.retries.number", 5); + this.pause = conf.getLong("hbase.client.pause", 10 * 1000); + this.numRetries = conf.getInt("hbase.client.retries.number", 10); this.maxRPCAttempts = conf.getInt("hbase.client.rpc.maxattempts", 1); this.master = null; this.masterChecked = false; } - + + private long getPauseTime(int tries) { + if (tries >= HConstants.RETRY_BACKOFF.length) + tries = HConstants.RETRY_BACKOFF.length - 1; + return this.pause * HConstants.RETRY_BACKOFF[tries]; + } + /** {@inheritDoc} */ public HMasterInterface getMaster() throws MasterNotRunningException { HServerAddress masterLocation = null; @@ -199,13 +205,14 @@ break; } LOG.info("Attempt " + tries + " of " + this.numRetries + - " failed with <" + e + ">. Retrying after sleep of " + this.pause); + " failed with <" + e + ">. Retrying after sleep of " + + getPauseTime(tries)); } // We either cannot connect to master or it is not running. Sleep & retry try { - Thread.sleep(this.pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } @@ -578,7 +585,7 @@ } try{ - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e){ // continue } @@ -795,7 +802,7 @@ if (LOG.isDebugEnabled()) { LOG.debug("Sleeping. Waiting for root region."); } - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); if (LOG.isDebugEnabled()) { LOG.debug("Wake. Retry finding root region."); } @@ -837,7 +844,7 @@ if (LOG.isDebugEnabled()) { LOG.debug("Root region location changed. Sleeping."); } - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); if (LOG.isDebugEnabled()) { LOG.debug("Wake. Retry finding root region."); } @@ -890,7 +897,7 @@ } } try { - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } Index: src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 680847) +++ src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -126,6 +126,12 @@ return this.connection.listTables(); } + private long getPauseTime(int tries) { + if (tries >= HConstants.RETRY_BACKOFF.length) + tries = HConstants.RETRY_BACKOFF.length - 1; + return this.pause * HConstants.RETRY_BACKOFF[tries]; + } + /** * Creates a new table * @@ -155,7 +161,7 @@ } } try { - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } @@ -274,7 +280,7 @@ } try { - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } @@ -320,14 +326,15 @@ // Wait until all regions are enabled - while (!isTableEnabled(tableName)) { + for (int tries = 0; + (tries < numRetries) && (!isTableEnabled(tableName)); + tries++) { if (LOG.isDebugEnabled()) { LOG.debug("Sleep. Waiting for all regions to be enabled from " + Bytes.toString(tableName)); } try { - Thread.sleep(pause); - + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } @@ -336,6 +343,9 @@ Bytes.toString(tableName)); } } + if (!isTableEnabled(tableName)) + throw new IOException("unable to enable table " + + Bytes.toString(tableName)); LOG.info("Enabled table " + Bytes.toString(tableName)); } @@ -379,13 +389,15 @@ } // Wait until all regions are disabled - while (isTableEnabled(tableName)) { + for (int tries = 0; + (tries < numRetries) && (isTableEnabled(tableName)); + tries++) { if (LOG.isDebugEnabled()) { LOG.debug("Sleep. Waiting for all regions to be disabled from " + Bytes.toString(tableName)); } try { - Thread.sleep(pause); + Thread.sleep(getPauseTime(tries)); } catch (InterruptedException e) { // continue } @@ -394,6 +406,9 @@ Bytes.toString(tableName)); } } + if (isTableEnabled(tableName)) + throw new IOException("unable to disable table " + + Bytes.toString(tableName)); LOG.info("Disabled " + Bytes.toString(tableName)); }