diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index d1ab246..80f022f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -45,8 +45,6 @@ public interface BackupRestoreConstants { public static final String BACKUP_ATTEMPTS_PAUSE_MS_KEY = "hbase.backup.attempts.pause.ms"; public static final int DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS = 10000; - - /* * Drivers option list */ diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java index 75e0ab7..7ea923e 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java @@ -53,8 +53,8 @@ import org.apache.hadoop.hbase.backup.BackupRequest; import org.apache.hadoop.hbase.backup.BackupRestoreConstants; import org.apache.hadoop.hbase.backup.BackupRestoreConstants.BackupCommand; import org.apache.hadoop.hbase.backup.BackupType; -import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.backup.util.BackupSet; +import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; @@ -114,9 +114,12 @@ public final class BackupCommands { public static abstract class Command extends Configured { CommandLine cmdline; - + Connection conn; Command(Configuration conf) { - super(conf); + if (conf == null) { + conf = HBaseConfiguration.create(); + } + setConf(conf); } public void execute() throws IOException { @@ -124,9 +127,34 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + + // Create connection + conn = ConnectionFactory.createConnection(getConf()); + if (requiresNoActiveSession()) { + // Check active session + try (BackupSystemTable table = new BackupSystemTable(conn);) { + List sessions = table.getBackupInfos(BackupState.RUNNING); + + if(sessions.size() > 0) { + System.err.println("Found backup session in a RUNNING state: "); + System.err.println(sessions.get(0)); + System.err.println("This may indicate that a previous session has failed abnormally."); + System.err.println("In this case, backup recovery is recommended."); + throw new IOException("Active session found, aborted command execution"); + } + } + } } protected abstract void printUsage(); + + /** + * The command can't be run if active backup session is in progress + * @return true if no active sessions are in progress + */ + protected boolean requiresNoActiveSession() { + return false; + } } private BackupCommands() { @@ -178,8 +206,12 @@ public final class BackupCommands { } @Override + protected boolean requiresNoActiveSession() { + return true; + } + + @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -202,8 +234,8 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + String tables = null; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); // Check if we have both: backup set and list of tables if (cmdline.hasOption(OPTION_TABLE) && cmdline.hasOption(OPTION_SET)) { @@ -212,12 +244,13 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } - + // Creates connection + super.execute(); // Check backup set String setName = null; if (cmdline.hasOption(OPTION_SET)) { setName = cmdline.getOptionValue(OPTION_SET); - tables = getTablesForSet(setName, conf); + tables = getTablesForSet(setName, getConf()); if (tables == null) { System.out.println("ERROR: Backup set '" + setName @@ -235,8 +268,7 @@ public final class BackupCommands { cmdline.hasOption(OPTION_WORKERS) ? Integer.parseInt(cmdline .getOptionValue(OPTION_WORKERS)) : -1; - try (Connection conn = ConnectionFactory.createConnection(getConf()); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { BackupRequest.Builder builder = new BackupRequest.Builder(); BackupRequest request = builder.withBackupType(BackupType.valueOf(args[1].toUpperCase())) @@ -268,8 +300,7 @@ public final class BackupCommands { } private String getTablesForSet(String name, Configuration conf) throws IOException { - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable table = new BackupSystemTable(conn)) { + try (final BackupSystemTable table = new BackupSystemTable(conn)) { List tables = table.describeBackupSet(name); if (tables == null) return null; return StringUtils.join(tables, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND); @@ -304,7 +335,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -359,7 +389,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null) { printUsage(); throw new IOException(INCORRECT_USAGE); @@ -370,10 +399,10 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + super.execute(); + String backupId = args[1]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { BackupInfo info = sysTable.readBackupInfo(backupId); if (info == null) { System.out.println("ERROR: " + backupId + " does not exist"); @@ -399,7 +428,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length == 1) { System.out.println("No backup id was specified, " @@ -412,10 +440,10 @@ public final class BackupCommands { throw new IOException(INCORRECT_USAGE); } + super.execute(); + String backupId = (args == null || args.length <= 1) ? null : args[1]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { BackupInfo info = null; if (backupId != null) { @@ -456,19 +484,23 @@ public final class BackupCommands { } @Override + protected boolean requiresNoActiveSession() { + return true; + } + + @Override public void execute() throws IOException { - super.execute(); if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length < 2) { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String[] args = cmdline.getArgs(); String[] backupIds = new String[args.length - 1]; System.arraycopy(args, 1, backupIds, 0, backupIds.length); - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { int deleted = admin.deleteBackups(backupIds); System.out.println("Deleted " + deleted + " backups. Total requested: " + args.length); } @@ -512,7 +544,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); int n = parseHistoryLength(); final TableName tableName = getTableName(); @@ -535,18 +566,16 @@ public final class BackupCommands { }; Path backupRootPath = getBackupRootPath(); List history = null; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); if (backupRootPath == null) { // Load from backup system table - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { - + super.execute(); + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { history = sysTable.getBackupHistory(n, tableNameFilter, tableSetFilter); } } else { // load from backup FS history = - BackupUtils.getHistory(conf, n, backupRootPath, tableNameFilter, tableSetFilter); + BackupUtils.getHistory(getConf(), n, backupRootPath, tableNameFilter, tableSetFilter); } for (BackupInfo info : history) { System.out.println(info.getShortDescription()); @@ -627,7 +656,6 @@ public final class BackupCommands { @Override public void execute() throws IOException { - super.execute(); // Command-line must have at least one element if (cmdline == null || cmdline.getArgs() == null || cmdline.getArgs().length < 2) { printUsage(); @@ -661,11 +689,11 @@ public final class BackupCommands { } private void processSetList(String[] args) throws IOException { + super.execute(); + // List all backup set names // does not expect any args - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (BackupAdminImpl admin = new BackupAdminImpl(conn);) { List list = admin.listBackupSets(); for (BackupSet bs : list) { System.out.println(bs); @@ -678,10 +706,10 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupSystemTable sysTable = new BackupSystemTable(conn);) { + try (final BackupSystemTable sysTable = new BackupSystemTable(conn);) { List tables = sysTable.describeBackupSet(setName); BackupSet set = tables == null ? null : new BackupSet(setName, tables); if (set == null) { @@ -697,10 +725,10 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { boolean result = admin.deleteBackupSet(setName); if (result) { System.out.println("Delete set " + setName + " OK."); @@ -715,13 +743,12 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); String setName = args[2]; String[] tables = args[3].split(","); TableName[] tableNames = toTableNames(tables); - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { admin.removeFromBackupSet(setName, tableNames); } } @@ -739,15 +766,15 @@ public final class BackupCommands { printUsage(); throw new IOException(INCORRECT_USAGE); } + super.execute(); + String setName = args[2]; String[] tables = args[3].split(","); TableName[] tableNames = new TableName[tables.length]; for (int i = 0; i < tables.length; i++) { tableNames[i] = TableName.valueOf(tables[i]); } - Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create(); - try (final Connection conn = ConnectionFactory.createConnection(conf); - final BackupAdminImpl admin = new BackupAdminImpl(conn);) { + try (final BackupAdminImpl admin = new BackupAdminImpl(conn);) { admin.addToBackupSet(setName, tableNames); } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java index 217e750..870aca9 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupSystemTable.java @@ -1302,9 +1302,9 @@ public final class BackupSystemTable implements Closeable { return getTableName(conf).getNameAsString(); } - - - + public static String getSnapshotName(Configuration conf) { + return "snapshot_"+getTableNameAsString(conf).replace(":", "_"); + } /** * Creates Put operation for a given backup info object diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index ee7a841..efd758f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import com.google.common.annotations.VisibleForTesting; + /** * Full table backup implementation * @@ -108,11 +110,105 @@ public class FullTableBackupClient extends TableBackupClient { */ @Override public void execute() throws IOException { + if (isInTestMode()) { + executeForTesting(); + return; + } + try (Admin admin = conn.getAdmin();) { + + // Begin BACKUP + beginBackup(backupManager, backupInfo); + String savedStartCode = null; + boolean firstBackup = false; + // do snapshot for full table backup + + savedStartCode = backupManager.readBackupStartCode(); + firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L; + if (firstBackup) { + // This is our first backup. Let's put some marker to system table so that we can hold the logs + // while we do the backup. + backupManager.writeBackupStartCode(0L); + } + // We roll log here before we do the snapshot. It is possible there is duplicate data + // in the log that is already in the snapshot. But if we do it after the snapshot, we + // could have data loss. + // A better approach is to do the roll log on each RS in the same global procedure as + // the snapshot. + LOG.info("Execute roll log procedure for full backup ..."); + + Map props = new HashMap(); + props.put("backupRoot", backupInfo.getBackupRootDir()); + admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, + LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); + + newTimestamps = backupManager.readRegionServerLastLogRollResult(); + if (firstBackup) { + // Updates registered log files + // We record ALL old WAL files as registered, because + // this is a first full backup in the system and these + // files are not needed for next incremental backup + List logFiles = BackupUtils.getWALFilesOlderThan(conf, newTimestamps); + backupManager.recordWALFiles(logFiles); + } + + // SNAPSHOT_TABLES: + backupInfo.setPhase(BackupPhase.SNAPSHOT); + for (TableName tableName : tableList) { + String snapshotName = + "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_" + + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString(); + + snapshotTable(admin, tableName, snapshotName); + backupInfo.setSnapshotName(tableName, snapshotName); + } + + // SNAPSHOT_COPY: + // do snapshot copy + LOG.debug("snapshot copy for " + backupId); + snapshotCopy(backupInfo); + // Updates incremental backup table set + backupManager.addIncrementalBackupTableSet(backupInfo.getTables()); + + // BACKUP_COMPLETE: + // set overall backup status: complete. Here we make sure to complete the backup. + // After this checkpoint, even if entering cancel process, will let the backup finished + backupInfo.setState(BackupState.COMPLETE); + // The table list in backupInfo is good for both full backup and incremental backup. + // For incremental backup, it contains the incremental backup table set. + backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps); + + HashMap> newTableSetTimestampMap = + backupManager.readLogTimestampMap(); + + Long newStartCode = + BackupUtils.getMinValue(BackupUtils + .getRSLogTimestampMins(newTableSetTimestampMap)); + backupManager.writeBackupStartCode(newStartCode); + + // backup complete + completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf); + } catch (Exception e) { + failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ", + BackupType.FULL, conf); + throw new IOException(e); + } + + } + + @Override + @VisibleForTesting + protected void executeForTesting() throws IOException + { + + // Get the stage ID to fail on try (Admin admin = conn.getAdmin();) { // Begin BACKUP beginBackup(backupManager, backupInfo); + + failStageIf(0); + String savedStartCode = null; boolean firstBackup = false; // do snapshot for full table backup @@ -124,6 +220,8 @@ public class FullTableBackupClient extends TableBackupClient { // while we do the backup. backupManager.writeBackupStartCode(0L); } + failStageIf(1); + // We roll log here before we do the snapshot. It is possible there is duplicate data // in the log that is already in the snapshot. But if we do it after the snapshot, we // could have data loss. @@ -136,6 +234,8 @@ public class FullTableBackupClient extends TableBackupClient { admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); + failStageIf(2); + newTimestamps = backupManager.readRegionServerLastLogRollResult(); if (firstBackup) { // Updates registered log files @@ -156,6 +256,7 @@ public class FullTableBackupClient extends TableBackupClient { snapshotTable(admin, tableName, snapshotName); backupInfo.setSnapshotName(tableName, snapshotName); } + failStageIf(3); // SNAPSHOT_COPY: // do snapshot copy @@ -179,9 +280,12 @@ public class FullTableBackupClient extends TableBackupClient { BackupUtils.getMinValue(BackupUtils .getRSLogTimestampMins(newTableSetTimestampMap)); backupManager.writeBackupStartCode(newStartCode); + failStageIf(4); // backup complete completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf); + failStageIf(5); + } catch (Exception e) { failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ", BackupType.FULL, conf); diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java index 3003c93..fa90503 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java @@ -55,6 +55,8 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; import org.apache.hadoop.util.Tool; +import com.google.common.annotations.VisibleForTesting; + /** * Incremental backup implementation. * See the {@link #execute() execute} method. @@ -207,18 +209,24 @@ public class IncrementalTableBackupClient extends TableBackupClient { @Override public void execute() throws IOException { - // case PREPARE_INCREMENTAL: - beginBackup(backupManager, backupInfo); - backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); - LOG.debug("For incremental backup, current table set is " - + backupManager.getIncrementalBackupTableSet()); + if (isInTestMode()) { + executeForTesting(); + return; + } + try { + // case PREPARE_INCREMENTAL: + beginBackup(backupManager, backupInfo); + backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); + LOG.debug("For incremental backup, current table set is " + + backupManager.getIncrementalBackupTableSet()); newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap(); } catch (Exception e) { // fail the overall backup and return failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ", BackupType.INCREMENTAL, conf); + return; } // case INCREMENTAL_COPY: @@ -267,6 +275,69 @@ public class IncrementalTableBackupClient extends TableBackupClient { } } + @Override + @VisibleForTesting + protected void executeForTesting() throws IOException { + + // case PREPARE_INCREMENTAL: + beginBackup(backupManager, backupInfo); + + failStageIf(0); + + backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL); + LOG.debug("For incremental backup, current table set is " + + backupManager.getIncrementalBackupTableSet()); + try { + newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap(); + + failStageIf(1); + + // case INCREMENTAL_COPY: + // copy out the table and region info files for each table + BackupUtils.copyTableRegionInfo(conn, backupInfo, conf); + // convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT + convertWALsToHFiles(backupInfo); + incrementalCopyHFiles(backupInfo); + // Save list of WAL files copied + backupManager.recordWALFiles(backupInfo.getIncrBackupFileList()); + + failStageIf(2); + + // case INCR_BACKUP_COMPLETE: + // set overall backup status: complete. Here we make sure to complete the backup. + // After this checkpoint, even if entering cancel process, will let the backup finished + backupInfo.setState(BackupState.COMPLETE); + // Set the previousTimestampMap which is before this current log roll to the manifest. + HashMap> previousTimestampMap = + backupManager.readLogTimestampMap(); + backupInfo.setIncrTimestampMap(previousTimestampMap); + + // The table list in backupInfo is good for both full backup and incremental backup. + // For incremental backup, it contains the incremental backup table set. + backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps); + + failStageIf(3); + + HashMap> newTableSetTimestampMap = + backupManager.readLogTimestampMap(); + + Long newStartCode = + BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap)); + backupManager.writeBackupStartCode(newStartCode); + + handleBulkLoad(backupInfo.getTableNames()); + // backup complete + completeBackup(conn, backupInfo, backupManager, BackupType.INCREMENTAL, conf); + + failStageIf(4); + + } catch (Exception e) { + failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ", + BackupType.INCREMENTAL, conf); + } + + } + private void incrementalCopyHFiles(BackupInfo backupInfo) throws Exception { try { diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java index 125b5da..cd3bc74 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java @@ -43,6 +43,8 @@ import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSUtils; +import com.google.common.annotations.VisibleForTesting; + /** * Base class for backup operation. Concrete implementation for * full and incremental backup are delegated to corresponding sub-classes: @@ -51,6 +53,10 @@ import org.apache.hadoop.hbase.util.FSUtils; */ @InterfaceAudience.Private public abstract class TableBackupClient { + + public static final String BACKUP_TEST_MODE = "backup.test.mode"; + public static final String BACKUP_TEST_MODE_STAGE = "backup.test.mode.stage"; + private static final Log LOG = LogFactory.getLog(TableBackupClient.class); protected Configuration conf; @@ -88,6 +94,9 @@ public abstract class TableBackupClient { */ protected void beginBackup(BackupManager backupManager, BackupInfo backupInfo) throws IOException { + + snapshotBackupTable(); + backupManager.setBackupInfo(backupInfo); // set the start timestamp of the overall backup long startTs = EnvironmentEdgeManager.currentTime(); @@ -116,7 +125,7 @@ public abstract class TableBackupClient { * @param backupInfo backup info * @throws Exception exception */ - private void deleteSnapshot(final Connection conn, BackupInfo backupInfo, Configuration conf) + private static void deleteSnapshots(final Connection conn, BackupInfo backupInfo, Configuration conf) throws IOException { LOG.debug("Trying to delete snapshot for full backup."); for (String snapshotName : backupInfo.getSnapshotNames()) { @@ -127,8 +136,6 @@ public abstract class TableBackupClient { try (Admin admin = conn.getAdmin();) { admin.deleteSnapshot(snapshotName); - } catch (IOException ioe) { - LOG.debug("when deleting snapshot " + snapshotName, ioe); } LOG.debug("Deleting the snapshot " + snapshotName + " for backup " + backupInfo.getBackupId() + " succeeded."); @@ -140,7 +147,7 @@ public abstract class TableBackupClient { * snapshots. * @throws IOException exception */ - private void cleanupExportSnapshotLog(Configuration conf) throws IOException { + private static void cleanupExportSnapshotLog(Configuration conf) throws IOException { FileSystem fs = FSUtils.getCurrentFileSystem(conf); Path stagingDir = new Path(conf.get(BackupRestoreConstants.CONF_STAGING_ROOT, fs.getWorkingDirectory() @@ -163,7 +170,7 @@ public abstract class TableBackupClient { * Clean up the uncompleted data at target directory if the ongoing backup has already entered * the copy phase. */ - private void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) { + private static void cleanupTargetDir(BackupInfo backupInfo, Configuration conf) { try { // clean up the uncompleted data at target directory if the ongoing backup has already entered // the copy phase @@ -182,10 +189,10 @@ public abstract class TableBackupClient { new Path(HBackupFileSystem.getTableBackupDir(backupInfo.getBackupRootDir(), backupInfo.getBackupId(), table)); if (outputFs.delete(targetDirPath, true)) { - LOG.info("Cleaning up uncompleted backup data at " + targetDirPath.toString() + LOG.debug("Cleaning up uncompleted backup data at " + targetDirPath.toString() + " done."); } else { - LOG.info("No data has been copied to " + targetDirPath.toString() + "."); + LOG.debug("No data has been copied to " + targetDirPath.toString() + "."); } Path tableDir = targetDirPath.getParent(); @@ -229,21 +236,63 @@ public abstract class TableBackupClient { + ",failedts=" + backupInfo.getCompleteTs() + ",failedphase=" + backupInfo.getPhase() + ",failedmessage=" + backupInfo.getFailedMsg(); LOG.error(backupFailedData); - + cleanupAndRestoreBackupSystem(conn, backupInfo, conf); + // If backup session is updated to FAILED state - means we + // processed recovery already. backupManager.updateBackupInfo(backupInfo); + LOG.error("Backup " + backupInfo.getBackupId() + " failed."); + } + + public static void cleanupAndRestoreBackupSystem (Connection conn, BackupInfo backupInfo, + Configuration conf) throws IOException + { + BackupType type = backupInfo.getType(); + // if full backup, then delete HBase snapshots if there already are snapshots taken + // and also clean up export snapshot log files if exist + if (type == BackupType.FULL) { + deleteSnapshots(conn, backupInfo, conf); + cleanupExportSnapshotLog(conf); + } + restoreBackupTable(conn, conf); + deleteBackupTableSnapshot(conn, conf); + // clean up the uncompleted data at target directory if the ongoing backup has already entered + // the copy phase + // For incremental backup, DistCp logs will be cleaned with the targetDir. + cleanupTargetDir(backupInfo, conf); + } + + private void snapshotBackupTable() throws IOException { + + try (Admin admin = conn.getAdmin();){ + admin.snapshot(BackupSystemTable.getSnapshotName(conf), + BackupSystemTable.getTableName(conf)); + } + } + + private static void restoreBackupTable(Connection conn, Configuration conf) + throws IOException { + + LOG.debug("Restoring " + BackupSystemTable.getTableNameAsString(conf) + + " from snapshot"); + try (Admin admin = conn.getAdmin();) { + String snapshotName = BackupSystemTable.getSnapshotName(conf); + admin.disableTable(BackupSystemTable.getTableName(conf)); + admin.restoreSnapshot(snapshotName); + admin.enableTable(BackupSystemTable.getTableName(conf)); - // if full backup, then delete HBase snapshots if there already are snapshots taken - // and also clean up export snapshot log files if exist - if (type == BackupType.FULL) { - deleteSnapshot(conn, backupInfo, conf); - cleanupExportSnapshotLog(conf); } + LOG.debug("Done restoring backup system table"); + } - // clean up the uncompleted data at target directory if the ongoing backup has already entered - // the copy phase - // For incremental backup, DistCp logs will be cleaned with the targetDir. - cleanupTargetDir(backupInfo, conf); - LOG.info("Backup " + backupInfo.getBackupId() + " failed."); + private static void deleteBackupTableSnapshot(Connection conn, Configuration conf) + throws IOException { + LOG.debug("Deleting " + BackupSystemTable.getSnapshotName(conf) + + " from the system"); + try (Admin admin = conn.getAdmin();) { + String snapshotName = BackupSystemTable.getSnapshotName(conf); + admin.deleteSnapshot(snapshotName); + } + LOG.debug("Done deleting backup system table snapshot"); } /** @@ -366,11 +415,12 @@ public abstract class TableBackupClient { // - clean up directories with prefix "exportSnapshot-", which are generated when exporting // snapshots if (type == BackupType.FULL) { - deleteSnapshot(conn, backupInfo, conf); + deleteSnapshots(conn, backupInfo, conf); cleanupExportSnapshotLog(conf); } else if (type == BackupType.INCREMENTAL) { cleanupDistCpLog(backupInfo, conf); } + deleteBackupTableSnapshot(conn, conf); LOG.info("Backup " + backupInfo.getBackupId() + " completed."); } @@ -380,4 +430,28 @@ public abstract class TableBackupClient { */ public abstract void execute() throws IOException; + + @VisibleForTesting + protected abstract void executeForTesting() throws IOException; + + + @VisibleForTesting + protected boolean isInTestMode() { + return conf.getBoolean(BACKUP_TEST_MODE, false) == true; + } + + @VisibleForTesting + protected int getTestStageId() { + return conf.getInt(BACKUP_TEST_MODE_STAGE, 0); + } + + @VisibleForTesting + + protected void failStageIf(int stage) throws IOException { + int current = getTestStageId(); + if (current == stage) { + throw new IOException("Stage " + stage); + } + } + } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java new file mode 100644 index 0000000..ba2ae97 --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.backup; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; +import org.apache.hadoop.hbase.backup.impl.TableBackupClient; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.util.ToolRunner; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(LargeTests.class) +public class TestFullBackupWithFailures extends TestBackupBase { + + private static final Log LOG = LogFactory.getLog(TestFullBackupWithFailures.class); + + + + @Test + public void testFullBackupWithFailures() throws Exception { + conf1.setBoolean(TableBackupClient.BACKUP_TEST_MODE, true); + int stage = (new Random()).nextInt(6); + // Fail random stage between 0 and 5 inclusive + LOG.info("Running stage " + stage); + runBackupAndFailAtStage(stage); + } + + public void runBackupAndFailAtStage(int stage) throws Exception { + + conf1.setInt(TableBackupClient.BACKUP_TEST_MODE_STAGE, stage); + try (BackupSystemTable table = new BackupSystemTable(TEST_UTIL.getConnection())) { + int before = table.getBackupHistory().size(); + String[] args = + new String[] { "create", "full", BACKUP_ROOT_DIR, "-t", + table1.getNameAsString() + "," + table2.getNameAsString() }; + // Run backup + int ret = ToolRunner.run(conf1, new BackupDriver(), args); + assertFalse(ret == 0); + List backups = table.getBackupHistory(); + int after = table.getBackupHistory().size(); + + assertTrue(after == before +1); + for (BackupInfo data : backups) { + String backupId = data.getBackupId(); + assertFalse(checkSucceeded(backupId)); + } + Set tables = table.getIncrementalBackupTableSet(BACKUP_ROOT_DIR); + assertTrue(tables.size() == 0); + } + } + + + +} \ No newline at end of file