Index: src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (revision 1204937)
+++ src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (working copy)
@@ -103,7 +103,7 @@
public enum EventType {
// Messages originating from RS (NOTE: there is NO direct communication from
// RS to Master). These are a result of RS updates into ZK.
- RS_ZK_REGION_CLOSING (1), // RS is in process of closing a region
+ //RS_ZK_REGION_CLOSING (1), // It is replaced by M_ZK_REGION_CLOSING(HBASE-4739)
RS_ZK_REGION_CLOSED (2), // RS has finished closing a region
RS_ZK_REGION_OPENING (3), // RS is in process of opening a region
RS_ZK_REGION_OPENED (4), // RS has finished opening a region
@@ -132,6 +132,7 @@
// Updates from master to ZK. This is done by the master and there is
// nothing to process by either Master or RS
M_ZK_REGION_OFFLINE (50), // Master adds this region as offline in ZK
+ M_ZK_REGION_CLOSING (51), // Master adds this region as closing in ZK
// Master controlled events to be executed on the master
M_SERVER_SHUTDOWN (70), // Master is processing shutdown of a RS
Index: src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java (revision 1204937)
+++ src/main/java/org/apache/hadoop/hbase/executor/RegionTransitionData.java (working copy)
@@ -83,7 +83,7 @@
*
*
Used when the server name is known (a regionserver is setting it).
*
- *
Valid types for this constructor are {@link EventType#RS_ZK_REGION_CLOSING},
+ *
Valid types for this constructor are {@link EventType#M_ZK_REGION_CLOSING},
* {@link EventType#RS_ZK_REGION_CLOSED}, {@link EventType#RS_ZK_REGION_OPENING},
* {@link EventType#RS_ZK_REGION_SPLITTING},
* and {@link EventType#RS_ZK_REGION_OPENED}.
@@ -127,7 +127,7 @@
*
One of:
*
* - {@link EventType#M_ZK_REGION_OFFLINE}
- *
- {@link EventType#RS_ZK_REGION_CLOSING}
+ *
- {@link EventType#M_ZK_REGION_CLOSING}
*
- {@link EventType#RS_ZK_REGION_CLOSED}
*
- {@link EventType#RS_ZK_REGION_OPENING}
*
- {@link EventType#RS_ZK_REGION_OPENED}
Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1204937)
+++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy)
@@ -455,7 +455,7 @@
" in state " + data.getEventType());
synchronized (regionsInTransition) {
switch (data.getEventType()) {
- case RS_ZK_REGION_CLOSING:
+ case M_ZK_REGION_CLOSING:
// If zk node of the region was updated by a live server skip this
// region and just add it into RIT.
if (isOnDeadServer(regionInfo, deadServers) &&
@@ -681,7 +681,7 @@
regionState.getRegion(), sn, daughters));
break;
- case RS_ZK_REGION_CLOSING:
+ case M_ZK_REGION_CLOSING:
// Should see CLOSING after we have asked it to CLOSE or additional
// times after already being in state of CLOSING
if (regionState == null ||
@@ -1751,13 +1751,12 @@
}
state = new RegionState(region, RegionState.State.PENDING_CLOSE);
regionsInTransition.put(encodedName, state);
- } else if (force && state.isPendingClose()) {
- // JD 05/25/11
- // in my experience this is useless, when this happens it just spins
- debugLog(region, "Attempting to unassign region " +
- region.getRegionNameAsString() + " which is already pending close "
- + "but forcing an additional close");
- state.update(RegionState.State.PENDING_CLOSE);
+ } else if (force && (state.isPendingClose() || state.isClosing())) {
+ debugLog(region,
+ "Attempting to unassign region " + region.getRegionNameAsString() +
+ " which is already " + state.getState() +
+ " but forcing to send a CLOSE RPC again ");
+ state.update(state.getState());
} else {
debugLog(region, "Attempting to unassign region " +
region.getRegionNameAsString() + " but it is " +
@@ -1806,6 +1805,11 @@
}
}
}
+ // RS is already processing this region, only need to update the timestamp
+ if (t instanceof RegionAlreadyInTransitionException) {
+ debugLog(region, "update " + state + " the timestamp.");
+ state.update(state.getState());
+ }
}
LOG.info("Server " + server + " returned " + t + " for " +
region.getEncodedName());
@@ -2530,26 +2534,13 @@
LOG.info("Region has been PENDING_CLOSE for too "
+ "long, running forced unassign again on region="
+ regionInfo.getRegionNameAsString());
- try {
- // If the server got the RPC, it will transition the node
- // to CLOSING, so only do something here if no node exists
- if (!ZKUtil.watchAndCheckExists(watcher,
- ZKAssign.getNodeName(watcher, regionInfo.getEncodedName()))) {
- // Queue running of an unassign -- do actual unassign
- // outside of the regionsInTransition lock.
- invokeUnassign(regionInfo);
- }
- } catch (NoNodeException e) {
- LOG.debug("Node no longer existed so not forcing another "
- + "unassignment");
- } catch (KeeperException e) {
- LOG.warn("Unexpected ZK exception timing out a region close", e);
- }
+ invokeUnassign(regionInfo);
break;
case CLOSING:
LOG.info("Region has been CLOSING for too " +
"long, this should eventually complete or the server will " +
- "expire, doing nothing");
+ "expire, send RPC again");
+ invokeUnassign(regionInfo);
break;
}
}
Index: src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java (revision 1204937)
+++ src/main/java/org/apache/hadoop/hbase/master/UnAssignCallable.java (working copy)
@@ -40,7 +40,7 @@
@Override
public Object call() throws Exception {
- assignmentManager.unassign(hri);
+ assignmentManager.unassign(hri, true);
return null;
}
}
Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (revision 1204937)
+++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (working copy)
@@ -287,7 +287,7 @@
// Because these are already executed states.
if (hijack && null != curDataInZNode) {
EventType eventType = curDataInZNode.getEventType();
- if (eventType.equals(EventType.RS_ZK_REGION_CLOSING)
+ if (eventType.equals(EventType.M_ZK_REGION_CLOSING)
|| eventType.equals(EventType.RS_ZK_REGION_CLOSED)
|| eventType.equals(EventType.RS_ZK_REGION_OPENED)) {
return -1;
@@ -423,7 +423,7 @@
HRegionInfo region)
throws KeeperException, KeeperException.NoNodeException {
String regionName = region.getEncodedName();
- return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSING);
+ return deleteNode(zkw, regionName, EventType.M_ZK_REGION_CLOSING);
}
/**
@@ -562,7 +562,7 @@
region.getEncodedName() + " in a CLOSING state"));
RegionTransitionData data = new RegionTransitionData(
- EventType.RS_ZK_REGION_CLOSING, region.getRegionName(), serverName);
+ EventType.M_ZK_REGION_CLOSING, region.getRegionName(), serverName);
String node = getNodeName(zkw, region.getEncodedName());
return ZKUtil.createAndWatch(zkw, node, data.getBytes());
@@ -598,7 +598,7 @@
HRegionInfo region, ServerName serverName, int expectedVersion)
throws KeeperException {
return transitionNode(zkw, region, serverName,
- EventType.RS_ZK_REGION_CLOSING,
+ EventType.M_ZK_REGION_CLOSING,
EventType.RS_ZK_REGION_CLOSED, expectedVersion);
}
Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 1204937)
+++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy)
@@ -378,11 +378,13 @@
// Let's just assign everything to first RS
HRegionServer hrs = cluster.getRegionServer(0);
ServerName serverName = hrs.getServerName();
-
+ HRegionInfo closingRegion = enabledRegions.remove(0);
// we'll need some regions to already be assigned out properly on live RS
List enabledAndAssignedRegions = new ArrayList();
enabledAndAssignedRegions.add(enabledRegions.remove(0));
enabledAndAssignedRegions.add(enabledRegions.remove(0));
+ enabledAndAssignedRegions.add(closingRegion);
+
List disabledAndAssignedRegions = new ArrayList();
disabledAndAssignedRegions.add(disabledRegions.remove(0));
disabledAndAssignedRegions.add(disabledRegions.remove(0));
@@ -436,24 +438,9 @@
/*
* ZK = CLOSING
*/
+ regionsThatShouldBeOnline.add(closingRegion);
+ ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
-// Disabled test of CLOSING. This case is invalid after HBASE-3181.
-// How can an RS stop a CLOSING w/o deleting the node? If it did ever fail
-// and left the node in CLOSING, the RS would have aborted and we'd process
-// these regions in server shutdown
-//
-// // Region of enabled table being closed but not complete
-// // Region is already assigned, don't say anything to RS but set ZK closing
-// region = enabledAndAssignedRegions.remove(0);
-// regionsThatShouldBeOnline.add(region);
-// ZKAssign.createNodeClosing(zkw, region, serverName);
-//
-// // Region of disabled table being closed but not complete
-// // Region is already assigned, don't say anything to RS but set ZK closing
-// region = disabledAndAssignedRegions.remove(0);
-// regionsThatShouldBeOffline.add(region);
-// ZKAssign.createNodeClosing(zkw, region, serverName);
-
/*
* ZK = CLOSED
*/