diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java index 5794fce..cc5cc95 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java @@ -134,6 +134,8 @@ public class BackupDriver extends AbstractHBaseTool { return -1; } throw e; + } finally { + command.finish(); } return 0; } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index d1ab246..80f022f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -45,8 +45,6 @@ public interface BackupRestoreConstants { public static final String BACKUP_ATTEMPTS_PAUSE_MS_KEY = "hbase.backup.attempts.pause.ms"; public static final int DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS = 10000; - - /* * Drivers option list */ diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java index eb60860..8454f1b 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java @@ -530,11 +530,20 @@ public class BackupAdminImpl implements BackupAdmin { withTotalTasks(request.getTotalTasks()). withBandwidthPerTasks((int)request.getBandwidth()).build(); + TableBackupClient client =null; + try { if (type == BackupType.FULL) { - new FullTableBackupClient(conn, backupId, request).execute(); + client = new FullTableBackupClient(conn, backupId, request); } else { - new IncrementalTableBackupClient(conn, backupId, request).execute(); + client = new IncrementalTableBackupClient(conn, backupId, request); + } + } catch (IOException e) { + LOG.error("There is an active session already running"); + throw e; } + + client.execute(); + return backupId; } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java index 75e0ab7..211a706 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java @@ -53,8 +53,8 @@ import org.apache.hadoop.hbase.backup.BackupRequest; import org.apache.hadoop.hbase.backup.BackupRestoreConstants; import org.apache.hadoop.hbase.backup.BackupRestoreConstants.BackupCommand; import org.apache.hadoop.hbase.backup.BackupType; -import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.backup.util.BackupSet; +import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; @@ -114,9 +114,12 @@ public final class BackupCommands { public static abstract class Command extends Configured { CommandLine cmdline; - + Connection conn; Command(Configuration conf) { - super(conf); + if (conf == null) { + conf = HBaseConfiguration.create(); + } + setConf(conf); } public void execute() throws IOException { @@ -124,9 +127,40 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + + // Create connection + conn = ConnectionFactory.createConnection(getConf()); + if (requiresNoActiveSession()) { + // Check active session + try (BackupSystemTable table = new BackupSystemTable(conn);) { + List sessions = table.getBackupInfos(BackupState.RUNNING); + + if(sessions.size() > 0) { + System.err.println("Found backup session in a RUNNING state: "); + System.err.println(sessions.get(0)); + System.err.println("This may indicate that a previous session has failed abnormally."); + System.err.println("In this case, backup recovery is recommended."); + throw new IOException("Active session found, aborted command execution"); + } + } + } + } + + public void finish() throws IOException { + if (conn != null) { + conn.close(); + } } protected abstract void printUsage(); + + /** + * The command can't be run if active backup session is in progress + * @return true if no active sessions are in progress + */ + protected boolean requiresNoActiveSession() { + return false; + } } private BackupCommands() { @@ -178,8 +212,12 @@ public final class BackupCommands { } @Override + protected boolean requiresNoActiveSession() { + return true; + } + + @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -202,8 +240,8 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + String tables = null; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); // Check if we have both: backup set and list of tables if (cmdline.hasOption(OPTION_TABLE) && cmdline.hasOption(OPTION_SET)) { @@ -212,12 +250,13 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } - + // Creates connection + super.execute(); // Check backup set String setName = null; if (cmdline.hasOption(OPTION_SET)) { setName = cmdline.getOptionValue(OPTION_SET); - tables = getTablesForSet(setName, conf); + tables = getTablesForSet(setName, getConf()); if (tables == null) { System.out.println("ERROR: Backup set '" + setName @@ -235,8 +274,7 @@ public final class BackupCommands { cmdline.hasOption(OPTION_WORKERS) ? Integer.parseInt(cmdline .getOptionValue(OPTION_WORKERS)) : -1; - try (Connection conn = ConnectionFactory.createConnection(getConf()); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { BackupRequest.Builder builder = new BackupRequest.Builder(); BackupRequest request = builder.withBackupType(BackupType.valueOf(args[1].toUpperCase())) @@ -268,8 +306,7 @@ public final class BackupCommands { } private String getTablesForSet(String name, Configuration conf) throws IOException { - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable table = new BackupSystemTable(conn)) { + try (final BackupSystemTable table = new BackupSystemTable(conn)) { List tables = table.describeBackupSet(name); if (tables == null) return null; return StringUtils.join(tables, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND); @@ -304,7 +341,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -359,7 +395,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -370,10 +405,10 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + super.execute(); + String backupId = args[1]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { BackupInfo info = sysTable.readBackupInfo(backupId); if (info == null) { System.out.println("ERROR: " + backupId + " does not exist"); @@ -399,7 +434,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length == 1) { System.out.println("No backup id was specified, " @@ -412,10 +446,10 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + super.execute(); + String backupId = (args == null || args.length <= 1) ? null : args[1]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { BackupInfo info = null; if (backupId != null) { @@ -456,19 +490,23 @@ public final class BackupCommands { } @Override + protected boolean requiresNoActiveSession() { + return true; + } + + @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length < 2) { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String[] args = cmdline.getArgs(); String[] backupIds = new String[args.length - 1]; System.arraycopy(args, 1, backupIds, 0, backupIds.length); - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { int deleted = admin.deleteBackups(backupIds); System.out.println("Deleted " + deleted + " backups. Total requested: " + args.length); } @@ -512,7 +550,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); int n = parseHistoryLength(); final TableName tableName = getTableName(); @@ -535,18 +572,16 @@ public final class BackupCommands { }; Path backupRootPath = getBackupRootPath(); List history = null; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); if (backupRootPath == null) { // Load from backup system table - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { - + super.execute(); + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { history = sysTable.getBackupHistory(n, tableNameFilter, tableSetFilter); } } else { // load from backup FS history = - BackupUtils.getHistory(conf, n, backupRootPath, tableNameFilter, tableSetFilter); + BackupUtils.getHistory(getConf(), n, backupRootPath, tableNameFilter, tableSetFilter); } for (BackupInfo info : history) { System.out.println(info.getShortDescription()); @@ -627,7 +662,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); // Command-line must have at least one element if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length < 2) { printUsage(); @@ -661,11 +695,11 @@ public final class BackupCommands { } private void processSetList(String[] args) throws IOException { + super.execute(); + // List all backup set names // does not expect any args - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { List list = admin.listBackupSets(); for (BackupSet bs : list) { System.out.println(bs); @@ -678,10 +712,10 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { List tables = sysTable.describeBackupSet(setName); BackupSet set = tables == null ? null : new BackupSet(setName, tables); if (set == null) { @@ -697,10 +731,10 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { boolean result = admin.deleteBackupSet(setName); if (result) { System.out.println("Delete set " + setName + " OK."); @@ -715,13 +749,12 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); String setName = args[2]; String[] tables = args[3].split(","); TableName[] tableNames = toTableNames(tables); - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { admin.removeFromBackupSet(setName, tableNames); } } @@ -739,15 +772,15 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; String[] tables = args[3].split(","); TableName[] tableNames = new TableName[tables.length]; for (int i = 0; i < tables.length; i++) { tableNames[i] = TableName.valueOf(tables[i]); } - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { admin.addToBackupSet(setName, tableNames); } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java index f09310f..a929700 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java @@ -367,6 +367,23 @@ public class BackupManager implements Closeable { } /** + * Starts new backup session + * @throws IOException if active session already exists + */ + public void startBackupSession() throws IOException { + systemTable.startBackupSession(); + } + + /** + * Finishes active backup session + * @throws IOException if no active session + */ + public void finishBackupSession() throws IOException { + systemTable.finishBackupSession(); + } + + + /** * Read the last backup start code (timestamp) of last successful backup. Will return null if * there is no startcode stored in backup system table or the value is of length 0. These two * cases indicate there is no successful backup completed so far. diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 217e750..022319b 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -133,6 +133,12 @@ public final class BackupSystemTable implements Closeable { private final static String BACKUP_INFO_PREFIX = "session:"; private final static String START_CODE_ROW = "startcode:"; + private final static byte[] ACTIVE_SESSION_ROW = "activesession:".getBytes(); + private final static byte[] ACTIVE_SESSION_COL = "c".getBytes(); + + private final static byte[] ACTIVE_SESSION_YES = "yes".getBytes(); + private final static byte[] ACTIVE_SESSION_NO = "no".getBytes(); + private final static String INCR_BACKUP_SET = "incrbackupset:"; private final static String TABLE_RS_LOG_MAP_PREFIX = "trslm:"; private final static String RS_LOG_TS_PREFIX = "rslogts:"; @@ -555,6 +561,41 @@ public final class BackupSystemTable implements Closeable { } } + public void startBackupSession() throws IOException + { + if (LOG.isTraceEnabled()) { + LOG.trace("Start new backup session"); + } + try (Table table = connection.getTable(tableName)) { + Put put = createPutForStartBackupSession(); + table.checkAndPut(ACTIVE_SESSION_ROW, SESSIONS_FAMILY, ACTIVE_SESSION_COL, + ACTIVE_SESSION_NO, put); + } + } + + private Put createPutForStartBackupSession() { + Put put = new Put(ACTIVE_SESSION_ROW); + put.addColumn(SESSIONS_FAMILY, ACTIVE_SESSION_COL, ACTIVE_SESSION_YES); + return put; + } + + public void finishBackupSession() throws IOException + { + if (LOG.isTraceEnabled()) { + LOG.trace("Stop backup session"); + } + try (Table table = connection.getTable(tableName)) { + Put put = createPutForStopBackupSession(); + table.checkAndPut(ACTIVE_SESSION_ROW, SESSIONS_FAMILY, ACTIVE_SESSION_COL, + ACTIVE_SESSION_YES, put); } + } + + private Put createPutForStopBackupSession() { + Put put = new Put(ACTIVE_SESSION_ROW); + put.addColumn(SESSIONS_FAMILY, ACTIVE_SESSION_COL, ACTIVE_SESSION_NO); + return put; + } + /** * Get the Region Servers log information after the last log roll from backup system table. * @param backupRoot root directory path to backup @@ -1302,9 +1343,9 @@ public final class BackupSystemTable implements Closeable { return getTableName(conf).getNameAsString(); } - - - + public static String getSnapshotName(Configuration conf) { + return "snapshot_"+getTableNameAsString(conf).replace(":", "_"); + } /** * Creates Put operation for a given backup info object diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index ee7a841..f4e2f22 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import com.google.common.annotations.VisibleForTesting; + /** * Full table backup implementation * @@ -108,11 +110,105 @@ public class FullTableBackupClient extends TableBackupClient { */ @Override public void execute() throws IOException { + if (isInTestMode()) { + executeForTesting(); + return; + } + try (Admin admin = conn.getAdmin();) { + + // Begin BACKUP + beginBackup(backupManager, backupInfo); + String savedStartCode = null; + boolean firstBackup = false; + // do snapshot for full table backup + + savedStartCode = backupManager.readBackupStartCode(); + firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L; + if (firstBackup) { + // This is our first backup. Let's put some marker to system table so that we can hold the logs + // while we do the backup. + backupManager.writeBackupStartCode(0L); + } + // We roll log here before we do the snapshot. It is possible there is duplicate data + // in the log that is already in the snapshot. But if we do it after the snapshot, we + // could have data loss. + // A better approach is to do the roll log on each RS in the same global procedure as + // the snapshot. + LOG.info("Execute roll log procedure for full backup ..."); + + Map props = new HashMap(); + props.put("backupRoot", backupInfo.getBackupRootDir()); + admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, + LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); + + newTimestamps = backupManager.readRegionServerLastLogRollResult(); + if (firstBackup) { + // Updates registered log files + // We record ALL old WAL files as registered, because + // this is a first full backup in the system and these + // files are not needed for next incremental backup + List logFiles = BackupUtils.getWALFilesOlderThan(conf, newTimestamps); + backupManager.recordWALFiles(logFiles); + } + + // SNAPSHOT_TABLES: + backupInfo.setPhase(BackupPhase.SNAPSHOT); + for (TableName tableName : tableList) { + String snapshotName = + "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_" + + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString(); + + snapshotTable(admin, tableName, snapshotName); + backupInfo.setSnapshotName(tableName, snapshotName); + } + + // SNAPSHOT_COPY: + // do snapshot copy + LOG.debug("snapshot copy for " + backupId); + snapshotCopy(backupInfo); + // Updates incremental backup table set + backupManager.addIncrementalBackupTableSet(backupInfo.getTables()); + + // BACKUP_COMPLETE: + // set overall backup status: complete. Here we make sure to complete the backup. + // After this checkpoint, even if entering cancel process, will let the backup finished + backupInfo.setState(BackupState.COMPLETE); + // The table list in backupInfo is good for both full backup and incremental backup. + // For incremental backup, it contains the incremental backup table set. + backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps); + + HashMap> newTableSetTimestampMap = + backupManager.readLogTimestampMap(); + + Long newStartCode = + BackupUtils.getMinValue(BackupUtils + .getRSLogTimestampMins(newTableSetTimestampMap)); + backupManager.writeBackupStartCode(newStartCode); + + // backup complete + completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf); + } catch (Exception e) { + failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ", + BackupType.FULL, conf); + throw new IOException(e); + } + + } + + @Override + @VisibleForTesting + protected void executeForTesting() throws IOException + { + + // Get the stage ID to fail on try (Admin admin = conn.getAdmin();) { // Begin BACKUP beginBackup(backupManager, backupInfo); + + failStageIf(0); + String savedStartCode = null; boolean firstBackup = false; // do snapshot for full table backup @@ -124,6 +220,8 @@ public class FullTableBackupClient extends TableBackupClient { // while we do the backup. backupManager.writeBackupStartCode(0L); } + failStageIf(1); + // We roll log here before we do the snapshot. It is possible there is duplicate data // in the log that is already in the snapshot. But if we do it after the snapshot, we // could have data loss. @@ -136,6 +234,8 @@ public class FullTableBackupClient extends TableBackupClient { admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); + failStageIf(2); + newTimestamps = backupManager.readRegionServerLastLogRollResult(); if (firstBackup) { // Updates registered log files @@ -156,6 +256,7 @@ public class FullTableBackupClient extends TableBackupClient { snapshotTable(admin, tableName, snapshotName); backupInfo.setSnapshotName(tableName, snapshotName); } + failStageIf(3); // SNAPSHOT_COPY: // do snapshot copy @@ -179,9 +280,11 @@ public class FullTableBackupClient extends TableBackupClient { BackupUtils.getMinValue(BackupUtils .getRSLogTimestampMins(newTableSetTimestampMap)); backupManager.writeBackupStartCode(newStartCode); + failStageIf(4); // backup complete completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf); + } catch (Exception e) { failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ", BackupType.FULL, conf); diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java index 3003c93..8908055 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java @@ -55,6 +55,8 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; import org.apache.hadoop.util.Tool; +import com.google.common.annotations.VisibleForTesting; + /** * Incremental backup implementation. * See the {@link #execute() execute} method. @@ -207,18 +209,24 @@ public class IncrementalTableBackupClient extends TableBackupClient { @Override public void execute() throws IOException { - // case PREPARE_INCREMENTAL: - beginBackup(backupManager, backupInfo); - backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); - LOG.debug("For incremental backup, current table set is " - + backupManager.getIncrementalBackupTableSet()); + if (isInTestMode()) { + executeForTesting(); + return; + } + try { + // case PREPARE_INCREMENTAL: + beginBackup(backupManager, backupInfo); + backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); + LOG.debug("For incremental backup, current table set is " + + backupManager.getIncrementalBackupTableSet()); newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap(); } catch (Exception e) { // fail the overall backup and return failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ", BackupType.INCREMENTAL, conf); + return; } // case INCREMENTAL_COPY: @@ -267,6 +275,67 @@ public class IncrementalTableBackupClient extends TableBackupClient { } } + @Override + @VisibleForTesting + protected void executeForTesting() throws IOException { + + // case PREPARE_INCREMENTAL: + beginBackup(backupManager, backupInfo); + + failStageIf(0); + + backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); + LOG.debug("For incremental backup, current table set is " + + backupManager.getIncrementalBackupTableSet()); + try { + newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap(); + + failStageIf(1); + + // case INCREMENTAL_COPY: + // copy out the table and region info files for each table + BackupUtils.copyTableRegionInfo(conn, backupInfo, conf); + // convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT + convertWALsToHFiles(backupInfo); + incrementalCopyHFiles(backupInfo); + // Save list of WAL files copied + backupManager.recordWALFiles(backupInfo.getIncrBackupFileList()); + + failStageIf(2); + + // case INCR_BACKUP_COMPLETE: + // set overall backup status: complete. Here we make sure to complete the backup. + // After this checkpoint, even if entering cancel process, will let the backup finished + backupInfo.setState(BackupState.COMPLETE); + // Set the previousTimestampMap which is before this current log roll to the manifest. + HashMap> previousTimestampMap = + backupManager.readLogTimestampMap(); + backupInfo.setIncrTimestampMap(previousTimestampMap); + + // The table list in backupInfo is good for both full backup and incremental backup. + // For incremental backup, it contains the incremental backup table set. + backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps); + + failStageIf(3); + + HashMap> newTableSetTimestampMap = + backupManager.readLogTimestampMap(); + + Long newStartCode = + BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap)); + backupManager.writeBackupStartCode(newStartCode); + + handleBulkLoad(backupInfo.getTableNames()); + // backup complete + completeBackup(conn, backupInfo, backupManager, BackupType.INCREMENTAL, conf); + + } catch (Exception e) { + failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ", + BackupType.INCREMENTAL, conf); + } + + } + private void incrementalCopyHFiles(BackupInfo backupInfo) throws Exception { try { diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java index 125b5da..20cd5ce 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java @@ -43,6 +43,8 @@ import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; +import com.google.common.annotations.VisibleForTesting; + /** * Base class for backup operation. Concrete implementation for * full and incremental backup are delegated to corresponding sub-classes: @@ -51,6 +53,10 @@ import org.apache.hadoop.hbase.util.FSUtils; */ @InterfaceAudience.Private public abstract class TableBackupClient { + + public static final String BACKUP_TEST_MODE = "backup.test.mode"; + public static final String BACKUP_TEST_MODE_STAGE = "backup.test.mode.stage"; + private static final Log LOG = LogFactory.getLog(TableBackupClient.class); protected Configuration conf; @@ -79,6 +85,8 @@ public abstract class TableBackupClient { if (tableList == null || tableList.isEmpty()) { this.tableList = new ArrayList<>(backupInfo.getTables()); } + // Start new session + backupManager.startBackupSession(); } /** @@ -88,6 +96,8 @@ public abstract class TableBackupClient { */ protected void beginBackup(BackupManager backupManager, BackupInfo backupInfo) throws IOException { + + snapshotBackupTable(); backupManager.setBackupInfo(backupInfo); // set the start timestamp of the overall backup long startTs = EnvironmentEdgeManager.currentTime(); @@ -116,7 +126,7 @@ public abstract class TableBackupClient { * @param backupInfo backup info * @throws Exception exception */ - private void deleteSnapshot(final Connection conn, BackupInfo backupInfo, Configuration conf) + private static void deleteSnapshots(final Connection conn, BackupInfo backupInfo, Configuration conf) throws IOException { LOG.debug("Trying to delete snapshot for full backup."); for (String snapshotName : backupInfo.getSnapshotNames()) { @@ -127,8 +137,6 @@ public abstract class TableBackupClient { try (Admin admin = conn.getAdmin();) { admin.deleteSnapshot(snapshotName); - } catch (IOException ioe) { - LOG.debug("when deleting snapshot " + snapshotName, ioe); } LOG.debug("Deleting the snapshot " + snapshotName + " for backup " + backupInfo.getBackupId() + " succeeded."); @@ -140,7 +148,7 @@ public abstract class TableBackupClient { * snapshots. * @throws IOException exception */ - private void cleanupExportSnapshotLog(Configuration conf) throws IOException { + private static void cleanupExportSnapshotLog(Configuration conf) throws IOException { FileSystem fs = FSUtils.getCurrentFileSystem(conf); Path stagingDir = new Path(conf.get(BackupRestoreConstants.CONF_STAGING_ROOT, fs.getWorkingDirectory() @@ -163,7 +171,7 @@ public abstract class TableBackupClient { * Clean up the uncompleted data at target directory if the ongoing backup has already entered * the copy phase. */ - private void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) { + private static void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) { try { // clean up the uncompleted data at target directory if the ongoing backup has already entered // the copy phase @@ -182,10 +190,10 @@ public abstract class TableBackupClient { new Path(HBackupFileSystem.getTableBackupDir(backupInfo.getBackupRootDir(), backupInfo.getBackupId(), table)); if (outputFs.delete(targetDirPath, true)) { - LOG.info("Cleaning up uncompleted backup data at " + targetDirPath.toString() + LOG.debug("Cleaning up uncompleted backup data at " + targetDirPath.toString() + " done."); } else { - LOG.info("No data has been copied to " + targetDirPath.toString() + "."); + LOG.debug("No data has been copied to " + targetDirPath.toString() + "."); } Path tableDir = targetDirPath.getParent(); @@ -229,21 +237,64 @@ public abstract class TableBackupClient { + ",failedts=" + backupInfo.getCompleteTs() + ",failedphase=" + backupInfo.getPhase() + ",failedmessage=" + backupInfo.getFailedMsg(); LOG.error(backupFailedData); - + cleanupAndRestoreBackupSystem(conn, backupInfo, conf); + // If backup session is updated to FAILED state - means we + // processed recovery already. backupManager.updateBackupInfo(backupInfo); + backupManager.finishBackupSession(); + LOG.error("Backup " + backupInfo.getBackupId() + " failed."); + } + + public static void cleanupAndRestoreBackupSystem (Connection conn, BackupInfo backupInfo, + Configuration conf) throws IOException + { + BackupType type = backupInfo.getType(); + // if full backup, then delete HBase snapshots if there already are snapshots taken + // and also clean up export snapshot log files if exist + if (type == BackupType.FULL) { + deleteSnapshots(conn, backupInfo, conf); + cleanupExportSnapshotLog(conf); + } + restoreBackupTable(conn, conf); + deleteBackupTableSnapshot(conn, conf); + // clean up the uncompleted data at target directory if the ongoing backup has already entered + // the copy phase + // For incremental backup, DistCp logs will be cleaned with the targetDir. + cleanupTargetDir(backupInfo, conf); + } + + private void snapshotBackupTable() throws IOException { + + try (Admin admin = conn.getAdmin();){ + admin.snapshot(BackupSystemTable.getSnapshotName(conf), + BackupSystemTable.getTableName(conf)); + } + } + + private static void restoreBackupTable(Connection conn, Configuration conf) + throws IOException { + + LOG.debug("Restoring " + BackupSystemTable.getTableNameAsString(conf) + + " from snapshot"); + try (Admin admin = conn.getAdmin();) { + String snapshotName = BackupSystemTable.getSnapshotName(conf); + admin.disableTable(BackupSystemTable.getTableName(conf)); + admin.restoreSnapshot(snapshotName); + admin.enableTable(BackupSystemTable.getTableName(conf)); - // if full backup, then delete HBase snapshots if there already are snapshots taken - // and also clean up export snapshot log files if exist - if (type == BackupType.FULL) { - deleteSnapshot(conn, backupInfo, conf); - cleanupExportSnapshotLog(conf); } + LOG.debug("Done restoring backup system table"); + } - // clean up the uncompleted data at target directory if the ongoing backup has already entered - // the copy phase - // For incremental backup, DistCp logs will be cleaned with the targetDir. - cleanupTargetDir(backupInfo, conf); - LOG.info("Backup " + backupInfo.getBackupId() + " failed."); + private static void deleteBackupTableSnapshot(Connection conn, Configuration conf) + throws IOException { + LOG.debug("Deleting " + BackupSystemTable.getSnapshotName(conf) + + " from the system"); + try (Admin admin = conn.getAdmin();) { + String snapshotName = BackupSystemTable.getSnapshotName(conf); + admin.deleteSnapshot(snapshotName); + } + LOG.debug("Done deleting backup system table snapshot"); } /** @@ -366,11 +417,15 @@ public abstract class TableBackupClient { // - clean up directories with prefix "exportSnapshot-", which are generated when exporting // snapshots if (type == BackupType.FULL) { - deleteSnapshot(conn, backupInfo, conf); + deleteSnapshots(conn, backupInfo, conf); cleanupExportSnapshotLog(conf); } else if (type == BackupType.INCREMENTAL) { cleanupDistCpLog(backupInfo, conf); } + deleteBackupTableSnapshot(conn, conf); + // Finish active session + backupManager.finishBackupSession(); + LOG.info("Backup " + backupInfo.getBackupId() + " completed."); } @@ -380,4 +435,28 @@ public abstract class TableBackupClient { */ public abstract void execute() throws IOException; + + @VisibleForTesting + protected abstract void executeForTesting() throws IOException; + + + @VisibleForTesting + protected boolean isInTestMode() { + return conf.getBoolean(BACKUP_TEST_MODE, false) == true; + } + + @VisibleForTesting + protected int getTestStageId() { + return conf.getInt(BACKUP_TEST_MODE_STAGE, 0); + } + + @VisibleForTesting + + protected void failStageIf(int stage) throws IOException { + int current = getTestStageId(); + if (current == stage) { + throw new IOException("Failed stage " + stage+" in testing"); + } + } + } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java new file mode 100644 index 0000000..9efe09f --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; +import org.apache.hadoop.hbase.backup.impl.TableBackupClient; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.util.ToolRunner; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(LargeTests.class) +public class TestFullBackupWithFailures extends TestBackupBase { + + private static final Log LOG = LogFactory.getLog(TestFullBackupWithFailures.class); + + + + @Test + public void testFullBackupWithFailures() throws Exception { + conf1.setBoolean(TableBackupClient.BACKUP_TEST_MODE, true); + int stage = (new Random()).nextInt(5); + // Fail random stage between 0 and 4 inclusive + LOG.info("Running stage " + stage); + runBackupAndFailAtStage(stage); + } + + public void runBackupAndFailAtStage(int stage) throws Exception { + + conf1.setInt(TableBackupClient.BACKUP_TEST_MODE_STAGE, stage); + try (BackupSystemTable table = new BackupSystemTable(TEST_UTIL.getConnection())) { + int before = table.getBackupHistory().size(); + String[] args = + new String[] { "create", "full", BACKUP_ROOT_DIR, "-t", + table1.getNameAsString() + "," + table2.getNameAsString() }; + // Run backup + int ret = ToolRunner.run(conf1, new BackupDriver(), args); + assertFalse(ret == 0); + List backups = table.getBackupHistory(); + int after = table.getBackupHistory().size(); + + assertTrue(after == before +1); + for (BackupInfo data : backups) { + String backupId = data.getBackupId(); + assertFalse(checkSucceeded(backupId)); + } + Set tables = table.getIncrementalBackupTableSet(BACKUP_ROOT_DIR); + assertTrue(tables.size() == 0); + } + } + + + +} \ No newline at end of file