From 21a386f223594ba5ff77c29e8f320e8caa1e9e47 Mon Sep 17 00:00:00 2001 From: khemani Date: Mon, 26 Mar 2012 13:20:01 -0700 Subject: [PATCH] [HBASE-5606] SplitLogManger async delete node hangs log splitting when ZK connection is lost --- .../hadoop/hbase/master/SplitLogManager.java | 24 ++++++++++++++++--- 1 files changed, 20 insertions(+), 4 deletions(-) diff --git src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java index 5d94ea9..a402f94 100644 --- src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java +++ src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java @@ -404,6 +404,14 @@ public class SplitLogManager extends ZooKeeperListener { tot_mgr_get_data_queued.incrementAndGet(); } + private void tryGetDataSetWatch(String path) { + // A negative retry count will lead to ignoring all error processing. + this.watcher.getRecoverableZooKeeper().getZooKeeper(). + getData(path, this.watcher, + new GetDataAsyncCallback(), new Long(-1) /* retry count */); + tot_mgr_get_data_queued.incrementAndGet(); + } + private void getDataSetWatchSuccess(String path, byte[] data, int version) { if (data == null) { if (version == Integer.MIN_VALUE) { @@ -916,11 +924,13 @@ public class SplitLogManager extends ZooKeeperListener { for (Map.Entry e : tasks.entrySet()) { String path = e.getKey(); Task task = e.getValue(); - // we have to do this check again because tasks might have - // been asynchronously assigned. - if (task.isUnassigned()) { + // we have to do task.isUnassigned() check again because tasks might + // have been asynchronously assigned. There is no locking required + // for these checks ... it is OK even if tryGetDataSetWatch() is + // called unnecessarily for a task + if (task.isUnassigned() && (task.status != FAILURE)) { // We just touch the znode to make sure its still there - getDataSetWatch(path, zkretries); + tryGetDataSetWatch(path); } } createRescanNode(Long.MAX_VALUE); @@ -990,6 +1000,12 @@ public class SplitLogManager extends ZooKeeperListener { return; } Long retry_count = (Long) ctx; + + if (retry_count < 0) { + LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " + + path + ". Ignoring error. No error handling. No retrying."); + return; + } LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " + path + " remaining retries=" + retry_count); if (retry_count == 0) { -- 1.7.7.2