Subject: [PATCH] HBASE-28150 --- Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java (revision e8b2efac3fa2da7ce8c34dffbd299b068ab4c39c) +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java (date 1698930229570) @@ -56,11 +56,14 @@ import org.apache.hadoop.hbase.mob.MobConstants; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; import org.apache.hadoop.hbase.replication.ReplicationUtils; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.hadoop.hbase.util.Threads; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,6 +83,8 @@ private TableName tableName; private TableDescriptor tableDescriptor; + private RetryCounter retryCounter; + public DeleteTableProcedure() { // Required by the Procedure framework to create the procedure on replay super(); @@ -162,7 +167,13 @@ if (isRollbackSupported(state)) { setFailure("master-delete-table", e); } else { - LOG.warn("Retriable error trying to delete table=" + getTableName() + " state=" + state, e); + if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); + } + long sleepTime = retryCounter.getBackoffTimeAndIncrementAttempts(); + LOG.warn("Retriable error trying to delete table=" + getTableName() + " state=" + state + + ", Will sleep " + sleepTime / 1000 + " secs and try again later", e); + Threads.sleep(sleepTime); } } return Flow.HAS_MORE_STATE; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java (revision e8b2efac3fa2da7ce8c34dffbd299b068ab4c39c) +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java (date 1698998493177) @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; @@ -60,7 +61,9 @@ import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; +import org.apache.hadoop.hbase.util.Threads; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,6 +82,8 @@ private TableDescriptor tableDescriptor; private List newRegions; + private RetryCounter retryCounter; + public CreateTableProcedure() { // Required by the Procedure framework to create the procedure on replay super(); @@ -156,7 +161,13 @@ if (isRollbackSupported(state)) { setFailure("master-create-table", e); } else { - LOG.warn("Retriable error trying to create table=" + getTableName() + " state=" + state, e); + if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); + } + long sleepTime = retryCounter.getBackoffTimeAndIncrementAttempts(); + LOG.warn("Retriable error trying to create table=" + getTableName() + " state=" + state + + ", Will sleep " + sleepTime / 1000 + " secs and try again later", e); + Threads.sleep(sleepTime); } } return Flow.HAS_MORE_STATE; Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedureRetry.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedureRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedureRetry.java new file mode 100644 --- /dev/null (date 1699001303134) +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestCreateTableProcedureRetry.java (date 1699001303134) @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.coprocessor.BadMasterObserver; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.BeforeClass; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestCreateTableProcedureRetry extends TestCreateTableProcedure { + + private static void setupConf(Configuration conf) { + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, BadMasterObserver.class.getName()); + + // create table will retry 3 times. + conf.setInt("hbase.badmasterobserver.postcreateaction.failed.times", 3); + } + + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(1); + } +} Index: hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserver.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserver.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserver.java new file mode 100644 --- /dev/null (date 1699001280664) +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/BadMasterObserver.java (date 1699001280664) @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.coprocessor; + +import java.io.IOException; +import java.util.Optional; +import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A MasterObserver for testing, could throw exception to test table ddl procedure. + */ +public class BadMasterObserver implements MasterObserver, MasterCoprocessor { + private static final Logger LOG = LoggerFactory.getLogger(BadMasterObserver.class); + + private int throwExceptionInPostCreateTimes = 0; + private int throwExceptionInPostDeleteTimes = 0; + + @Override + public void start(CoprocessorEnvironment env) throws IOException { + throwExceptionInPostCreateTimes = + env.getConfiguration().getInt("hbase.badmasterobserver.postcreateaction.failed.times", 0); + throwExceptionInPostDeleteTimes = + env.getConfiguration().getInt("hbase.badmasterobserver.postdeleteaction.failed.times", 0); + } + + @Override + public void postCompletedCreateTableAction(ObserverContext ctx, + TableDescriptor desc, RegionInfo[] regions) throws IOException { + if (throwExceptionInPostCreateTimes > 0 && !desc.getTableName().isSystemTable()) { + LOG.info("throw exception execute postCreateTable, times={}", + throwExceptionInPostCreateTimes); + throwExceptionInPostCreateTimes--; + throw new IOException("throw exception for test."); + } + } + + @Override + public void postCompletedDeleteTableAction(ObserverContext ctx, + TableName tableName) throws IOException { + if (throwExceptionInPostDeleteTimes > 0 && !tableName.isSystemTable()) { + LOG.info("throw exception execute postDeleteTable, times={}", + throwExceptionInPostDeleteTimes); + throwExceptionInPostDeleteTimes--; + throw new IOException("throw exception for test."); + } + } + + @Override + public Optional getMasterObserver() { + return Optional.of(this); + } +} Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureRetry.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureRetry.java new file mode 100644 --- /dev/null (date 1699001310674) +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureRetry.java (date 1699001310674) @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.coprocessor.BadMasterObserver; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.BeforeClass; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestDeleteTableProcedureRetry extends TestDeleteTableProcedure { + + private static void setupConf(Configuration conf) { + conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, BadMasterObserver.class.getName()); + + // delete table will retry 3 times. + conf.setInt("hbase.badmasterobserver.postdeleteaction.failed.times", 3); + } + + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(1); + } +}