From e50a94d35ab12aa318f0d7dea30976efcfd9a9e1 Mon Sep 17 00:00:00 2001 From: Yi Liang Date: Tue, 17 Oct 2017 15:04:11 -0700 Subject: [PATCH] HBASE-18984: [AMv2] Retain assignment does not work well in AMv2 --- .../hbase/master/assignment/AssignProcedure.java | 6 ++++++ .../hbase/master/assignment/AssignmentManager.java | 22 ++++++++++++++++------ .../hbase/master/assignment/RegionStateStore.java | 7 ++++--- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 3eba571..4e62f2b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -183,6 +183,12 @@ public class AssignProcedure extends RegionTransitionProcedure { return false; } + if (this.targetServer != null && !isServerOnline(env, targetServer)) { + LOG.info("Can not assign region to a dead target RegionServer:" + + targetServer.getServerName()); + return false; + } + // Send assign (add into assign-pool). Region is now in OFFLINE state. Setting offline state // scrubs what was the old region location. Setting a new regionLocation here is how we retain // old assignment or specify target server if a move or merge. See diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 8bdf4d5..852bed6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1434,9 +1434,13 @@ public class AssignmentManager implements ServerListener { synchronized (regionNode) { State state = regionNode.transitionState(State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN); regionStates.addRegionToServer(regionNode.getRegionLocation(), regionNode); - regionStateStore.updateRegionLocation(regionNode.getRegionInfo(), state, - regionNode.getRegionLocation(), regionNode.getLastHost(), HConstants.NO_SEQNUM, - regionNode.getProcedure().getProcId()); + // No need to update meta here, and this is the only place we set regionNode as OPENING + // If master crashed before #markRegionAsOpened() (where we set region as OPEN in meta) + // And after master is restarted, this region maybe reloaded and assigned when master do bulk assign. + // So there are might be 2 AssignProcedures for this region. One is created by master when bulk + // assign, the other is the one failed before master crashed. But there can only be one + // AssignProcedure per region running at a time since each procedure takes a lock on the region. + // No matter which one AssignProcedure succeed, the the other will fail, and wont affect others. } // update the operation count metrics @@ -1450,7 +1454,7 @@ public class AssignmentManager implements ServerListener { opening = true; regionStates.removeRegionFromServer(regionNode.getRegionLocation(), regionNode); } - // Should we update hbase:meta? + // No need to undo update meta, since we do not update meta at markRegionAsOpening() } if (opening) { // TODO: Metrics. Do opposite of metrics.incrementOperationCounter(); @@ -1467,8 +1471,6 @@ public class AssignmentManager implements ServerListener { setMetaInitialized(hri, true); } regionStates.addRegionToServer(regionNode.getRegionLocation(), regionNode); - // TODO: OPENING Updates hbase:meta too... we need to do both here and there? - // That is a lot of hbase:meta writing. regionStateStore.updateRegionLocation(regionNode.getRegionInfo(), state, regionNode.getRegionLocation(), regionNode.getLastHost(), regionNode.getOpenSeqNum(), regionNode.getProcedure().getProcId()); @@ -1485,6 +1487,14 @@ public class AssignmentManager implements ServerListener { setMetaInitialized(hri, false); } regionStates.addRegionToServer(regionNode.getRegionLocation(), regionNode); + // we need to update CLOSING in hbase:meta in this case. + // Let's consider when master crashed before markRegionAsClosed()(where we set region as closed). + // After master restart, this region reloaded, and may have States other than CLOSE and OFFLINE. + // If this region has OPEN state, it will be assigned again by bulk assign. So, there are + // both AssignProcedure and UnAssignProcedure for the same region. + // Since only one RegionTransitionProcedure per region running at a time, there are 2 situations: + // 1. AssignProcedure run first, the region will be eventually unassigned. => correct + // 2. UnAssignProcedure run first, this region will be assigned as OPEN. => wrong regionStateStore.updateRegionLocation(regionNode.getRegionInfo(), state, regionNode.getRegionLocation(), regionNode.getLastHost(), HConstants.NO_SEQNUM, regionNode.getProcedure().getProcId()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java index f9a1b43..fe8ac35 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java @@ -124,9 +124,10 @@ public class RegionStateStore { final ServerName regionLocation = getRegionServer(result, replicaId); final long openSeqNum = -1; - // TODO: move under trace, now is visible for debugging - LOG.info(String.format("Load hbase:meta entry region=%s regionState=%s lastHost=%s regionLocation=%s", - regionInfo, state, lastHost, regionLocation)); + if (LOG.isTraceEnabled()) { + LOG.info(String.format("Load hbase:meta entry region=%s regionState=%s lastHost=%s regionLocation=%s", + regionInfo, state, lastHost, regionLocation)); + } visitor.visitRegionState(regionInfo, state, regionLocation, lastHost, openSeqNum); } -- 2.10.1