From 3bb063ebaa4fc58c3e8692c1a97903c713cf7784 Mon Sep 17 00:00:00 2001 From: Vladimir Rodionov Date: Wed, 14 Mar 2018 17:35:12 -0700 Subject: [PATCH] HBASE-19441: Implement retry logic around starting exclusive backup operation --- .../hadoop/hbase/backup/impl/BackupManager.java | 35 +++++++++++++++++++++- .../hbase/backup/impl/BackupSystemTable.java | 6 ++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java index f09d6d06ac..55e33f38c8 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java @@ -59,6 +59,11 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti */ @InterfaceAudience.Private public class BackupManager implements Closeable { + // in seconds + public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_KEY = + "hbase.backup.exclusive.op.timeout"; + //In seconds + private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600; private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class); protected Configuration conf = null; @@ -369,7 +374,35 @@ public class BackupManager implements Closeable { * @throws IOException if active session already exists */ public void startBackupSession() throws IOException { - systemTable.startBackupExclusiveOperation(); + long startTime = System.currentTimeMillis(); + long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_KEY, + DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L; + long lastWarningOutputTime = 0; + while (System.currentTimeMillis() - startTime < timeout) { + try { + systemTable.startBackupExclusiveOperation(); + return; + } catch (IOException e) { + String msg = e.getMessage(); + if (msg != null && msg.equals(BackupSystemTable.ACTIVE_EXCLUSIVE_OPERATION_ON)) { + // sleep, then repeat + try { + Thread.sleep(1000); + } catch (InterruptedException e1) { + } + if (lastWarningOutputTime == 0 + || (System.currentTimeMillis() - lastWarningOutputTime) > 60000) { + lastWarningOutputTime = System.currentTimeMillis(); + LOG.warn("Waiting to acquire backup exclusive lock for " + + (lastWarningOutputTime - startTime) / 1000 + "s"); + } + } else { + throw e; + } + } + } + throw new IOException( + "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s"); } /** diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 2d6cf267b3..d1901aa8c0 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -91,8 +91,10 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; */ @InterfaceAudience.Private public final class BackupSystemTable implements Closeable { + public final static String ACTIVE_EXCLUSIVE_OPERATION_ON = + "There is an active backup exclusive operation"; private static final Logger LOG = LoggerFactory.getLogger(BackupSystemTable.class); - + static class WALItem { String backupId; String walFile; @@ -596,7 +598,7 @@ public final class BackupSystemTable implements Closeable { // Row exists, try to put if value == ACTIVE_SESSION_NO if (!table.checkAndMutate(ACTIVE_SESSION_ROW, SESSIONS_FAMILY).qualifier(ACTIVE_SESSION_COL) .ifEquals(ACTIVE_SESSION_NO).thenPut(put)) { - throw new IOException("There is an active backup exclusive operation"); + throw new IOException(ACTIVE_EXCLUSIVE_OPERATION_ON); } } } -- 2.11.0 (Apple Git-81)