diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index 08097593a4..9ff5445af1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -86,6 +86,11 @@ public abstract class RegionRemoteProcedureBase extends Procedure event = regionNode.getProcedureEvent(); if (event.isReady()) { LOG.warn( diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java index 638f9d3461..9693a1569e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java @@ -173,6 +173,8 @@ public class RSProcedureDispatcher } protected boolean scheduleForRetry(final IOException e) { + // TODO: SNRYE is often produced by a server that restarted in place of a crashed server + // with a different start code. We should avoid pointless retries by verifying here. // Should we wait a little before retrying? If the server is starting it's yes. final boolean hold = (e instanceof ServerNotRunningYetException); if (hold) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index efa16082f2..9224e04b4c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -1605,15 +1605,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, try { checkOpen(); - if (request.hasServerStartCode()) { - // check that we are the same server that this RPC is intended for. - long serverStartCode = request.getServerStartCode(); - if (regionServer.serverName.getStartcode() != serverStartCode) { - throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " + - "different server with startCode: " + serverStartCode + ", this server is: " - + regionServer.serverName)); - } - } + throwOnWrongStartCode(request); final String encodedRegionName = ProtobufUtil.getRegionEncodedName(request.getRegion()); requestCount.increment(); @@ -1922,6 +1914,32 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } } + private void throwOnWrongStartCode(OpenRegionRequest request) throws ServiceException { + if (!request.hasServerStartCode()) { + LOG.warn("OpenRegionRequest for {} does not have a start code", request.getOpenInfoList()); + return; + } + throwOnWrongStartCode(request.getServerStartCode()); + } + + private void throwOnWrongStartCode(CloseRegionRequest request) throws ServiceException { + if (!request.hasServerStartCode()) { + LOG.warn("CloseRegionRequest for {} does not have a start code", request.getRegion()); + return; + } + throwOnWrongStartCode(request.getServerStartCode()); + } + + private void throwOnWrongStartCode(long serverStartCode) throws ServiceException { + // check that we are the same server that this RPC is intended for. + if (regionServer.serverName.getStartcode() != serverStartCode) { + throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " + + "different server with startCode: " + serverStartCode + ", this server is: " + + regionServer.serverName)); + } + } + + /** * Open asynchronously a region or a set of regions on the region server. * @@ -1950,15 +1968,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, public OpenRegionResponse openRegion(final RpcController controller, final OpenRegionRequest request) throws ServiceException { requestCount.increment(); - if (request.hasServerStartCode()) { - // check that we are the same server that this RPC is intended for. - long serverStartCode = request.getServerStartCode(); - if (regionServer.serverName.getStartcode() != serverStartCode) { - throw new ServiceException(new DoNotRetryIOException("This RPC was intended for a " + - "different server with startCode: " + serverStartCode + ", this server is: " - + regionServer.serverName)); - } - } + throwOnWrongStartCode(request); OpenRegionResponse.Builder builder = OpenRegionResponse.newBuilder(); final int regionCount = request.getOpenInfoCount(); @@ -3753,15 +3763,22 @@ public class RSRpcServices implements HBaseRPCErrorHandler, checkOpen(); regionServer.getRegionServerCoprocessorHost().preExecuteProcedures(); if (request.getOpenRegionCount() > 0) { + for (OpenRegionRequest req : request.getOpenRegionList()) { + throwOnWrongStartCode(req); // No lambda, this can throw. + } // Avoid reading from the TableDescritor every time(usually it will read from the file // system) Map tdCache = new HashMap<>(); request.getOpenRegionList().forEach(req -> executeOpenRegionProcedures(req, tdCache)); } if (request.getCloseRegionCount() > 0) { + for (CloseRegionRequest req : request.getCloseRegionList()) { + throwOnWrongStartCode(req); // No lambda, this can throw. + } request.getCloseRegionList().forEach(this::executeCloseRegionProcedures); } if (request.getProcCount() > 0) { + // Should this also verify start codes? request.getProcList().forEach(this::executeProcedures); } regionServer.getRegionServerCoprocessorHost().postExecuteProcedures();