diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 4f5edf7a3e..dc888a2d2a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1515,8 +1515,6 @@ public class HMaster extends HRegionServer implements MasterServices { this.assignmentManager.stop(); } - stopProcedureExecutor(); - if (this.walManager != null) { this.walManager.stop(); } @@ -1564,7 +1562,9 @@ public class HMaster extends HRegionServer implements MasterServices { procedureExecutor.startWorkers(); } - private void stopProcedureExecutor() { + @Override + protected void stopProcedureExecutorAndStore() { + super.stopProcedureExecutorAndStore(); if (procedureExecutor != null) { configurationManager.deregisterObserver(procedureExecutor.getEnvironment()); procedureExecutor.getEnvironment().getRemoteDispatcher().stop(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 0b853b91b1..3fa2e8b7f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1056,6 +1056,15 @@ public class HRegionServer extends HasThread implements } } + // First, shut down RCP services so we minimize race potential if we respond to requests + // in the middle of abort, from partial/invalid state. + if (this.rpcServices != null) { + this.rpcServices.stop(); + } + + // Then, shut down procedures for the same reason; avoid persisting procedures in bad state. + stopProcedureExecutorAndStore(); + if (this.leases != null) { this.leases.closeAfterLeasesExpire(); } @@ -1087,7 +1096,6 @@ public class HRegionServer extends HasThread implements if (this.hMemManager != null) this.hMemManager.stop(); if (this.cacheFlusher != null) this.cacheFlusher.interruptIfNecessary(); if (this.compactSplitThread != null) this.compactSplitThread.interruptIfNecessary(); - sendShutdownInterrupt(); // Stop the snapshot and other procedure handlers, forcefully killing all running tasks if (rspmHost != null) { @@ -1167,10 +1175,6 @@ public class HRegionServer extends HasThread implements stopServiceThreads(); } - if (this.rpcServices != null) { - this.rpcServices.stop(); - } - try { deleteMyEphemeralNode(); } catch (KeeperException.NoNodeException nn) { @@ -2394,6 +2398,10 @@ public class HRegionServer extends HasThread implements return rpcServices; } + protected void stopProcedureExecutorAndStore() { + // No-op in RS; there's no procedure store so we don't have to shut the executor down early. + } + /** * Cause the server to exit without closing the regions it is serving, the log * it is using and without notifying the master. Used unit testing and on @@ -2469,12 +2477,6 @@ public class HRegionServer extends HasThread implements abort("Simulated kill"); } - /** - * Called on stop/abort before closing the cluster connection and meta locator. - */ - protected void sendShutdownInterrupt() { - } - /** * Wait on all threads to finish. Presumption is that all closes and stops * have already been called.