diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java index 2529905..1d78687 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java @@ -59,6 +59,16 @@ public class FSHDFSUtils extends FSUtils { // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves // beyond that limit 'to be safe'. long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 900000) + startWaiting; + // The retry interval needs to be chosen such that lease recovery can occur within this + // interval. For example, if the underlying hdfs cluster has dfs.socket.timeout = 60s + // which is the default value, lease recovery will take at least 60 seconds. This is because + // the primary datanode will try to recover the WAL block being written to, from all 3 + // data nodes including the the datanode which is currently dead (see HDFS 4721). This will + // timeout after 60 seconds. Hence, retrying faster than 60 seconds will cause lease recoveries + // to continue accumulating on the namenode. Even if a recovery succeeds, it will be + // preempted by a subsequent recovery. This will continue since until + // recoveryTimeout above kicks in. + int retryInterval = conf.getInt("hbase.lease.recovery.retry.interval", 61000); boolean recovered = false; int nbAttempt = 0; while (!recovered) { @@ -91,7 +101,11 @@ public class FSHDFSUtils extends FSUtils { " - continuing without the lease, but we could have a data loss."); } else { try { - Thread.sleep(nbAttempt < 3 ? 500 : 1000); + // Note that the namenode will choose the dead data node as the primary node the first + // time it initiates the recovery (see HDFS 4721) - dead meaning non responsive for + // few seconds but not yet declared dead by the namenode. Hence, the first recovery + // will be never truly occur, hence we do aggressive retries the first couple of times. + Thread.sleep(nbAttempt < 3 ? 900 : retryInterval); } catch (InterruptedException ie) { InterruptedIOException iioe = new InterruptedIOException(); iioe.initCause(ie);