Index: conf/hbase-default.xml
===================================================================
--- conf/hbase-default.xml (revision 680847)
+++ conf/hbase-default.xml (working copy)
@@ -112,11 +112,11 @@
hbase.client.retries.number
- 5
+ 10
Maximum retries. Used as maximum for all retryable
operations such as fetching of the root region from root region
server, getting a cell's value, starting a row update, etc.
- Default: 5.
+ Default: 10.
Index: src/java/org/apache/hadoop/hbase/HConstants.java
===================================================================
--- src/java/org/apache/hadoop/hbase/HConstants.java (revision 680847)
+++ src/java/org/apache/hadoop/hbase/HConstants.java (working copy)
@@ -226,4 +226,10 @@
public static final String NAME = "NAME";
public static final String VERSIONS = "VERSIONS";
public static final String IN_MEMORY = "IN_MEMORY";
+
+ /**
+ * This is a retry backoff multiplier table similar to the BSD TCP syn
+ * backoff table, a bit more aggressive than simple exponential backoff.
+ */
+ public static int RETRY_BACKOFF[] = { 1, 1, 1, 1, 2, 4, 8, 16, 32, 64 };
}
\ No newline at end of file
Index: src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
===================================================================
--- src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 680847)
+++ src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy)
@@ -163,14 +163,20 @@
"Unable to find region server interface " + serverClassName, e);
}
- this.pause = conf.getLong("hbase.client.pause", 30 * 1000);
- this.numRetries = conf.getInt("hbase.client.retries.number", 5);
+ this.pause = conf.getLong("hbase.client.pause", 10 * 1000);
+ this.numRetries = conf.getInt("hbase.client.retries.number", 10);
this.maxRPCAttempts = conf.getInt("hbase.client.rpc.maxattempts", 1);
this.master = null;
this.masterChecked = false;
}
-
+
+ private long getPauseTime(int tries) {
+ if (tries >= HConstants.RETRY_BACKOFF.length)
+ tries = HConstants.RETRY_BACKOFF.length - 1;
+ return this.pause * HConstants.RETRY_BACKOFF[tries];
+ }
+
/** {@inheritDoc} */
public HMasterInterface getMaster() throws MasterNotRunningException {
HServerAddress masterLocation = null;
@@ -199,13 +205,14 @@
break;
}
LOG.info("Attempt " + tries + " of " + this.numRetries +
- " failed with <" + e + ">. Retrying after sleep of " + this.pause);
+ " failed with <" + e + ">. Retrying after sleep of " +
+ getPauseTime(tries));
}
// We either cannot connect to master or it is not running. Sleep & retry
try {
- Thread.sleep(this.pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
@@ -578,7 +585,7 @@
}
try{
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e){
// continue
}
@@ -795,7 +802,7 @@
if (LOG.isDebugEnabled()) {
LOG.debug("Sleeping. Waiting for root region.");
}
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
if (LOG.isDebugEnabled()) {
LOG.debug("Wake. Retry finding root region.");
}
@@ -837,7 +844,7 @@
if (LOG.isDebugEnabled()) {
LOG.debug("Root region location changed. Sleeping.");
}
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
if (LOG.isDebugEnabled()) {
LOG.debug("Wake. Retry finding root region.");
}
@@ -890,7 +897,7 @@
}
}
try {
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
Index: src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
===================================================================
--- src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 680847)
+++ src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy)
@@ -126,6 +126,12 @@
return this.connection.listTables();
}
+ private long getPauseTime(int tries) {
+ if (tries >= HConstants.RETRY_BACKOFF.length)
+ tries = HConstants.RETRY_BACKOFF.length - 1;
+ return this.pause * HConstants.RETRY_BACKOFF[tries];
+ }
+
/**
* Creates a new table
*
@@ -155,7 +161,7 @@
}
}
try {
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
@@ -274,7 +280,7 @@
}
try {
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
@@ -320,14 +326,15 @@
// Wait until all regions are enabled
- while (!isTableEnabled(tableName)) {
+ for (int tries = 0;
+ (tries < numRetries) && (!isTableEnabled(tableName));
+ tries++) {
if (LOG.isDebugEnabled()) {
LOG.debug("Sleep. Waiting for all regions to be enabled from " +
Bytes.toString(tableName));
}
try {
- Thread.sleep(pause);
-
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
@@ -336,6 +343,9 @@
Bytes.toString(tableName));
}
}
+ if (!isTableEnabled(tableName))
+ throw new IOException("unable to enable table " +
+ Bytes.toString(tableName));
LOG.info("Enabled table " + Bytes.toString(tableName));
}
@@ -379,13 +389,15 @@
}
// Wait until all regions are disabled
- while (isTableEnabled(tableName)) {
+ for (int tries = 0;
+ (tries < numRetries) && (isTableEnabled(tableName));
+ tries++) {
if (LOG.isDebugEnabled()) {
LOG.debug("Sleep. Waiting for all regions to be disabled from " +
Bytes.toString(tableName));
}
try {
- Thread.sleep(pause);
+ Thread.sleep(getPauseTime(tries));
} catch (InterruptedException e) {
// continue
}
@@ -394,6 +406,9 @@
Bytes.toString(tableName));
}
}
+ if (isTableEnabled(tableName))
+ throw new IOException("unable to disable table " +
+ Bytes.toString(tableName));
LOG.info("Disabled " + Bytes.toString(tableName));
}