From 9766e2db768652f6317e3b73bee79d5f2edd56f8 Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Fri, 12 Sep 2014 09:28:27 -0700 Subject: [PATCH 1/2] HBASE-11963 Synchronize peer cluster replication connection attempts (Sukumar Maddineni) Synchronize peer cluster connection attempts to avoid races and rate limit connections when multiple replication sources try to connect to the peer cluster. If the peer cluster is down we can get out of control over time. --- .../hbase/replication/ReplicationPeersZKImpl.java | 40 +++++++++++++++------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java index b7a6447..df0e385 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java @@ -296,17 +296,25 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re if (peer == null) { return Collections.emptyList(); } - List addresses; - try { - addresses = fetchSlavesAddresses(peer.getZkw()); - } catch (KeeperException ke) { - if (LOG.isDebugEnabled()) { - LOG.debug("Fetch salves addresses failed.", ke); + // Synchronize peer cluster connection attempts to avoid races and rate + // limit connections when multiple replication sources try to connect to + // the peer cluster. If the peer cluster is down we can get out of control + // over time. + synchronized (peer) { + List addresses; + try { + addresses = fetchSlavesAddresses(peer.getZkw()); + } + catch (KeeperException ke) { + if (LOG.isDebugEnabled()) { + LOG.debug("Fetch salves addresses failed.", ke); + } + reconnectPeer(ke, peer); + addresses = Collections.emptyList(); } - reconnectPeer(ke, peer); - addresses = Collections.emptyList(); + peer.setRegionServers(addresses); } - peer.setRegionServers(addresses); + return peer.getRegionServers(); } @@ -317,10 +325,16 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re return null; } UUID peerUUID = null; - try { - peerUUID = ZKClusterId.getUUIDForCluster(peer.getZkw()); - } catch (KeeperException ke) { - reconnectPeer(ke, peer); + // Synchronize peer cluster connection attempts to avoid races and rate + // limit connections when multiple replication sources try to connect to + // the peer cluster. If the peer cluster is down we can get out of control + // over time. + synchronized (peer) { + try { + peerUUID = ZKClusterId.getUUIDForCluster(peer.getZkw()); + } catch (KeeperException ke) { + reconnectPeer(ke, peer); + } } return peerUUID; } -- 1.8.5.2 (Apple Git-48)