From dbda94078fea7c792b7f8a09537ef490bc9a6f4b Mon Sep 17 00:00:00 2001 From: zhangduo Date: Tue, 25 Sep 2018 21:16:00 +0800 Subject: [PATCH] HBASE-21227 Implement exponential retrying backoff for Assign/UnassignRegionHandler introduced in HBASE-21217 --- .../handler/AssignRegionHandler.java | 11 +++-- .../regionserver/handler/HandlerUtil.java | 40 +++++++++++++++++++ .../handler/UnassignRegionHandler.java | 8 ++-- 3 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/HandlerUtil.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java index bf3d4763e6..72dcce2076 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java @@ -54,6 +54,8 @@ public class AssignRegionHandler extends EventHandler { private final long masterSystemTime; + private int attempts; + public AssignRegionHandler(RegionServerServices server, RegionInfo regionInfo, @Nullable TableDescriptor tableDesc, long masterSystemTime, EventType eventType) { super(server, eventType); @@ -106,10 +108,11 @@ public class AssignRegionHandler extends EventHandler { // calling reportRegionStateTransition, so the HMaster will think the region is offline, // before we actually close the region, as reportRegionStateTransition is part of the // closing process. - LOG.info("Receiving OPEN for the region:{}, which we are trying to close, try again later", - regionName); - // TODO: backoff - rs.getExecutorService().delayedSubmit(this, 1, TimeUnit.SECONDS); + long backoff = HandlerUtil.getBackoffTimeMs(attempts++); + LOG.info( + "Receiving OPEN for the region:{}, which we are trying to close, try again after {}ms", + regionName, backoff); + rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS); } return; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/HandlerUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/HandlerUtil.java new file mode 100644 index 0000000000..4e6786e321 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/HandlerUtil.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.handler; + +import java.util.concurrent.ThreadLocalRandom; +import org.apache.yetus.audience.InterfaceAudience; + +@InterfaceAudience.Private +class HandlerUtil { + + /** + * Get an exponential backoff time, in milliseconds. The base unit is 100 milliseconds, and the + * max backoff time is 30 seconds. + */ + public static long getBackoffTimeMs(int attempts) { + long maxBackoffTime = 30L * 1000; // 30 seconds, hard code for now. + // avoid overflow + if (attempts >= 30) { + return maxBackoffTime; + } + long backoff = Math.min((long) (100 * Math.pow(2, attempts)), maxBackoffTime); + long jitter = (long) (ThreadLocalRandom.current().nextFloat() * backoff * 0.01f); + return backoff + jitter; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java index 2fb7393fc9..43126a4227 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java @@ -54,6 +54,8 @@ public class UnassignRegionHandler extends EventHandler { private final ServerName destination; + private int attempts; + public UnassignRegionHandler(RegionServerServices server, String encodedName, boolean abort, @Nullable ServerName destination, EventType eventType) { super(server, eventType); @@ -76,10 +78,10 @@ public class UnassignRegionHandler extends EventHandler { // This could happen as we will update the region state to OPEN when calling // reportRegionStateTransition, so the HMaster will think the region is online, before we // actually open the region, as reportRegionStateTransition is part of the opening process. + long backoff = HandlerUtil.getBackoffTimeMs(attempts++); LOG.warn("Received CLOSE for the region: {}, which we are already " + - "trying to OPEN. try again later.", encodedName); - // TODO: backoff - rs.getExecutorService().delayedSubmit(this, 1, TimeUnit.SECONDS); + "trying to OPEN. try again after {}ms", encodedName, backoff); + rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS); } else { LOG.info("Received CLOSE for the region: {}, which we are already trying to CLOSE," + " but not completed yet", encodedName); -- 2.17.1