From 6055d20d0e3c5f1e2e3ad6b6b300ca44505ee99a Mon Sep 17 00:00:00 2001 From: Vladimir Rodionov Date: Mon, 19 Mar 2018 20:01:05 -0700 Subject: [PATCH] HBASE-19441: Implement retry logic around starting exclusive backup operation --- .../hadoop/hbase/backup/impl/BackupManager.java | 36 +++++- .../hbase/backup/impl/BackupSystemTable.java | 5 +- .../backup/impl/ExclusiveOperationException.java | 12 ++ .../hadoop/hbase/backup/TestBackupManager.java | 131 +++++++++++++++++++++ 4 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/ExclusiveOperationException.java create mode 100644 hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupManager.java diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java index f09d6d06ac..917e2832df 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java @@ -59,6 +59,11 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti */ @InterfaceAudience.Private public class BackupManager implements Closeable { + // in seconds + public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY = + "hbase.backup.exclusive.op.timeout.seconds"; + //In seconds + private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600; private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class); protected Configuration conf = null; @@ -369,7 +374,36 @@ public class BackupManager implements Closeable { * @throws IOException if active session already exists */ public void startBackupSession() throws IOException { - systemTable.startBackupExclusiveOperation(); + long startTime = System.currentTimeMillis(); + long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY, + DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L; + long lastWarningOutputTime = 0; + while (System.currentTimeMillis() - startTime < timeout) { + try { + systemTable.startBackupExclusiveOperation(); + return; + } catch (IOException e) { + if (e instanceof ExclusiveOperationException) { + // sleep, then repeat + try { + Thread.sleep(1000); + } catch (InterruptedException e1) { + // Restore the interrupted status + Thread.currentThread().interrupt(); + } + if (lastWarningOutputTime == 0 + || (System.currentTimeMillis() - lastWarningOutputTime) > 60000) { + lastWarningOutputTime = System.currentTimeMillis(); + LOG.warn("Waiting to acquire backup exclusive lock for " + + (lastWarningOutputTime - startTime) / 1000 + "s"); + } + } else { + throw e; + } + } + } + throw new IOException( + "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s"); } /** diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 4a860d9857..adaa06a288 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -91,8 +91,9 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; */ @InterfaceAudience.Private public final class BackupSystemTable implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(BackupSystemTable.class); - + static class WALItem { String backupId; String walFile; @@ -596,7 +597,7 @@ public final class BackupSystemTable implements Closeable { // Row exists, try to put if value == ACTIVE_SESSION_NO if (!table.checkAndMutate(ACTIVE_SESSION_ROW, SESSIONS_FAMILY).qualifier(ACTIVE_SESSION_COL) .ifEquals(ACTIVE_SESSION_NO).thenPut(put)) { - throw new IOException("There is an active backup exclusive operation"); + throw new ExclusiveOperationException(); } } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/ExclusiveOperationException.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/ExclusiveOperationException.java new file mode 100644 index 0000000000..819f4c66a7 --- /dev/null +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/ExclusiveOperationException.java @@ -0,0 +1,12 @@ +package org.apache.hadoop.hbase.backup.impl; + +import java.io.IOException; + +@SuppressWarnings("serial") +public class ExclusiveOperationException extends IOException { + + public ExclusiveOperationException() { + super(); + } + +} diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupManager.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupManager.java new file mode 100644 index 0000000000..a4daf55902 --- /dev/null +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupManager.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLongArray; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.backup.impl.BackupManager; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestBackupManager { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestBackupSystemTable.class); + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + protected static Configuration conf = UTIL.getConfiguration(); + protected static MiniHBaseCluster cluster; + protected static Connection conn; + protected BackupManager backupManager; + + @BeforeClass + public static void setUp() throws Exception { + conf.setBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, true); + BackupManager.decorateMasterConfiguration(conf); + BackupManager.decorateRegionServerConfiguration(conf); + cluster = UTIL.startMiniCluster(); + conn = UTIL.getConnection(); + } + + @AfterClass + public static void tearDown() throws IOException { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Before + public void before() throws IOException { + backupManager = new BackupManager(conn, conn.getConfiguration()); + } + + @After + public void after() { + backupManager.close(); + } + + AtomicLongArray startTimes = new AtomicLongArray(2); + AtomicLongArray stopTimes = new AtomicLongArray(2); + + @Test + public void testStartBackupExclusiveOperation() { + + long sleepTime = 2000; + Runnable r = new Runnable() { + public void run() { + try { + backupManager.startBackupSession(); + boolean result = startTimes.compareAndSet(0, 0, System.currentTimeMillis()); + if (!result) { + result = startTimes.compareAndSet(1, 0, System.currentTimeMillis()); + if (!result) { + throw new IOException(); + } + } + Thread.sleep(sleepTime); + result = stopTimes.compareAndSet(0, 0, System.currentTimeMillis()); + if (!result) { + result = stopTimes.compareAndSet(1, 0, System.currentTimeMillis()); + if (!result) { + throw new IOException(); + } + } + backupManager.finishBackupSession(); + } catch (IOException | InterruptedException e) { + fail("Unexpected exception: "+ e.getMessage()); + } + } + }; + + Thread[] workers = new Thread[2]; + for (int i =0; i < workers.length; i++) { + workers[i] = new Thread(r); + workers[i].start(); + } + + for (int i =0; i < workers.length; i++) { + try { + workers[i].join(); + } catch (InterruptedException e) { + } + } + + assertTrue(startTimes.get(1) - startTimes.get(0) >= sleepTime); + assertTrue(stopTimes.get(1) - stopTimes.get(0) >= sleepTime); + + } + + +} -- 2.14.3 (Apple Git-98)