From 815bc3e75e9cbb6dfba98d39d7ba4bf11c1b2066 Mon Sep 17 00:00:00 2001 From: Bahram Chehrazy Date: Fri, 3 May 2019 15:28:36 -0700 Subject: [PATCH] Assign and close event handlers should remove region from the online list when exception are thrown. --- .../handler/AssignRegionHandler.java | 7 +++++++ .../handler/CloseRegionHandler.java | 17 +++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java index a978d72d2b..b3f3eee470 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java @@ -154,6 +154,13 @@ public class AssignRegionHandler extends EventHandler { protected void handleException(Throwable t) { LOG.warn("Fatal error occurred while opening region {}, aborting...", regionInfo.getRegionNameAsString(), t); + if (t.getClass() == IOException.class) { + try { + cleanUpAndReportFailure((IOException) t); + } catch (IOException e) { + LOG.warn("Clean up failed! Continuing on abort anyways."); + } + } getServer().abort( "Failed to open region " + regionInfo.getRegionNameAsString() + " and can not recover", t); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java index d4ea004cb2..0721705cd0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java @@ -93,14 +93,13 @@ public class CloseRegionHandler extends EventHandler { @Override public void process() { + String encodedRegionName = regionInfo.getEncodedName(); + LOG.trace("Processing close of {}", encodedRegionName); + // Check that this region is being served here + HRegion region = (HRegion)rsServices.getRegion(encodedRegionName); try { - String name = regionInfo.getEncodedName(); - LOG.trace("Processing close of {}", name); - String encodedRegionName = regionInfo.getEncodedName(); - // Check that this region is being served here - HRegion region = (HRegion)rsServices.getRegion(encodedRegionName); if (region == null) { - LOG.warn("Received CLOSE for region {} but currently not serving - ignoring", name); + LOG.warn("Received CLOSE for region {} but currently not serving - ignoring", encodedRegionName); // TODO: do better than a simple warning return; } @@ -110,7 +109,7 @@ public class CloseRegionHandler extends EventHandler { if (region.close(abort) == null) { // This region got closed. Most likely due to a split. // The split message will clean up the master state. - LOG.warn("Can't close region {}, was already closed during close()", name); + LOG.warn("Can't close region {}, was already closed during close()", encodedRegionName); return; } } catch (IOException ioe) { @@ -122,13 +121,15 @@ public class CloseRegionHandler extends EventHandler { throw new RuntimeException(ioe); } - this.rsServices.removeRegion(region, destination); rsServices.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); // Done! Region is closed on this RS LOG.debug("Closed " + region.getRegionInfo().getRegionNameAsString()); } finally { + // Remove this region from online and RIT list regardless of success or failure, because if failed, + // we abort anyways and don't want to try close again and block the shutdown. + this.rsServices.removeRegion(region, destination); this.rsServices.getRegionsInTransitionInRS(). remove(this.regionInfo.getEncodedNameAsBytes(), Boolean.FALSE); } -- 2.20.1.windows.1