From ba4a32fb6391f899871eee824cb5193c416c561d Mon Sep 17 00:00:00 2001 From: Vladimir Rodionov Date: Wed, 21 Feb 2018 17:14:15 -0800 Subject: [PATCH] HBASE-17825: Backup further optimizations --- .../backup/impl/IncrementalTableBackupClient.java | 13 ++-- .../backup/mapreduce/MapReduceBackupMergeJob.java | 61 +++++++++++++++-- .../TestIncrementalBackupMergeWithFailures.java | 6 +- .../hadoop/hbase/mapreduce/HFileOutputFormat2.java | 18 ++++- .../apache/hadoop/hbase/mapreduce/WALPlayer.java | 78 +++++++++++++--------- .../hadoop/hbase/util/FSTableDescriptors.java | 6 +- 6 files changed, 130 insertions(+), 52 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java index c897ae22a7..6724459493 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java @@ -374,14 +374,17 @@ public class IncrementalTableBackupClient extends TableBackupClient { Set tableSet = backupManager.getIncrementalBackupTableSet(); // filter missing files out (they have been copied by previous backups) incrBackupFileList = filterMissingFiles(incrBackupFileList); + List tableList = new ArrayList(); for (TableName table : tableSet) { // Check if table exists if (tableExists(table, conn)) { - walToHFiles(incrBackupFileList, table); + tableList.add(table.getNameAsString()); } else { LOG.warn("Table " + table + " does not exists. Skipping in WAL converter"); } } + walToHFiles(incrBackupFileList, tableList); + } protected boolean tableExists(TableName table, Connection conn) throws IOException { @@ -390,20 +393,20 @@ public class IncrementalTableBackupClient extends TableBackupClient { } } - protected void walToHFiles(List dirPaths, TableName tableName) throws IOException { + protected void walToHFiles(List dirPaths, List tableList) throws IOException { Tool player = new WALPlayer(); // Player reads all files in arbitrary directory structure and creates // a Map task for each file. We use ';' as separator // because WAL file names contains ',' String dirs = StringUtils.join(dirPaths, ';'); - String jobname = "Incremental_Backup-" + backupId + "-" + tableName.getNameAsString(); + String jobname = "Incremental_Backup-" + backupId ; - Path bulkOutputPath = getBulkOutputDirForTable(tableName); + Path bulkOutputPath = getBulkOutputDir(); conf.set(WALPlayer.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString()); conf.set(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ";"); conf.set(JOB_NAME_CONF_KEY, jobname); - String[] playerArgs = { dirs, tableName.getNameAsString() }; + String[] playerArgs = { dirs, StringUtils.join(tableList, ",") }; try { player.setConf(conf); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java index 6f2c44c4c7..71eeeedfc8 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceBackupMergeJob.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.backup.mapreduce; import static org.apache.hadoop.hbase.backup.util.BackupUtils.succeeded; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; @@ -29,7 +30,9 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.BackupInfo; import org.apache.hadoop.hbase.backup.BackupMergeJob; @@ -40,6 +43,8 @@ import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.util.Tool; import org.apache.yetus.audience.InterfaceAudience; @@ -113,6 +118,7 @@ public class MapReduceBackupMergeJob implements BackupMergeJob { // Find input directories for table Path[] dirPaths = findInputDirectories(fs, backupRoot, tableNames[i], backupIds); String dirs = StringUtils.join(dirPaths, ","); + Path bulkOutputPath = BackupUtils.getBulkOutputDir(BackupUtils.getFileNameCompatibleString(tableNames[i]), getConf(), false); @@ -243,19 +249,60 @@ public class MapReduceBackupMergeJob implements BackupMergeJob { protected void moveData(FileSystem fs, String backupRoot, Path bulkOutputPath, TableName tableName, String mergedBackupId) throws IllegalArgumentException, IOException { Path dest = - new Path(HBackupFileSystem.getTableBackupDataDir(backupRoot, mergedBackupId, tableName)); + new Path(HBackupFileSystem.getTableBackupDir(backupRoot, mergedBackupId, tableName)); - // Delete all in dest - if (!fs.delete(dest, true)) { + // Delete all *data* files in dest + if (!deleteData(fs, dest)) { throw new IOException("Could not delete " + dest); } FileStatus[] fsts = fs.listStatus(bulkOutputPath); for (FileStatus fst : fsts) { if (fst.isDirectory()) { - fs.rename(fst.getPath().getParent(), dest); + String family = fst.getPath().getName(); + Path newDst = new Path(dest, family); + if (fs.exists(newDst)) { + if (!fs.delete(newDst, true)) { + throw new IOException("failed to delete :"+ newDst); + } + } + fs.rename(fst.getPath(), dest); + } + } + } + + /** + * Deletes only data files and keeps all META + * @param fs file system instance + * @param dest destination location + * @return true, if success, false - otherwise + * @throws FileNotFoundException + * @throws IOException + */ + private boolean deleteData(FileSystem fs, Path dest) throws FileNotFoundException, IOException { + RemoteIterator it = fs.listFiles(dest, true); + List toDelete = new ArrayList(); + while (it.hasNext()) { + Path p = it.next().getPath(); + if (fs.isDirectory(p)) { + continue; + } + // Keep meta + String fileName = p.toString(); + if (fileName.indexOf(FSTableDescriptors.TABLEINFO_DIR) > 0 || + fileName.indexOf(HRegionFileSystem.REGION_INFO_FILE) > 0 ) { + continue; + } + toDelete.add(p); + } + + for (Path p : toDelete) { + boolean result = fs.delete(p, false); + if (!result) { + return false; } } + return true; } protected String findMostRecentBackupId(String[] backupIds) { @@ -291,12 +338,12 @@ public class MapReduceBackupMergeJob implements BackupMergeJob { for (String backupId : backupIds) { Path fileBackupDirPath = - new Path(HBackupFileSystem.getTableBackupDataDir(backupRoot, backupId, tableName)); + new Path(HBackupFileSystem.getTableBackupDir(backupRoot, backupId, tableName)); if (fs.exists(fileBackupDirPath)) { dirs.add(fileBackupDirPath); } else { - if (LOG.isTraceEnabled()) { - LOG.trace("File: " + fileBackupDirPath + " does not exist."); + if (LOG.isDebugEnabled()) { + LOG.debug("File: " + fileBackupDirPath + " does not exist."); } } } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupMergeWithFailures.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupMergeWithFailures.java index 83fb29e3d5..d0c7484b34 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupMergeWithFailures.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupMergeWithFailures.java @@ -27,7 +27,9 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl; @@ -254,7 +256,7 @@ public class TestIncrementalBackupMergeWithFailures extends TestBackupBase { request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); String backupIdIncMultiple2 = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple2)); - + // #4 Merge backup images with failures for (FailurePhase phase : FailurePhase.values()) { @@ -294,7 +296,7 @@ public class TestIncrementalBackupMergeWithFailures extends TestBackupBase { LOG.debug("Expected :"+ e.getMessage()); } } - + // Now merge w/o failures Configuration conf = conn.getConfiguration(); conf.unset(FAILURE_PHASE_KEY); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java index 60352ead23..3b04c0b00b 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat2.java @@ -258,15 +258,19 @@ public class HFileOutputFormat2 } else { tableNameBytes = Bytes.toBytes(writeTableNames); } + String tableName = Bytes.toString(tableNameBytes); + Path tableRelPath = getTableRelativePath(tableNameBytes); byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family); + WriterLength wl = this.writers.get(tableAndFamily); // If this is a new column family, verify that the directory exists if (wl == null) { Path writerPath = null; if (writeMultipleTables) { - writerPath = new Path(outputDir, new Path(Bytes.toString(tableNameBytes), Bytes + writerPath = new Path(outputDir,new Path(tableRelPath, Bytes .toString(family))); + } else { writerPath = new Path(outputDir, Bytes.toString(family)); @@ -289,7 +293,6 @@ public class HFileOutputFormat2 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { HRegionLocation loc = null; - String tableName = Bytes.toString(tableNameBytes); if (tableName != null) { try (Connection connection = ConnectionFactory.createConnection(conf); RegionLocator locator = @@ -341,6 +344,15 @@ public class HFileOutputFormat2 this.previousRow = rowKey; } + private Path getTableRelativePath(byte[] tableNameBytes) { + String tableName = Bytes.toString(tableNameBytes); + String[] tableNameParts = tableName.split(":"); + Path tableRelPath = new Path(tableName.split(":")[0]); + if (tableNameParts.length > 1) { + tableRelPath = new Path(tableRelPath, tableName.split(":")[1]); + } + return tableRelPath; + } private void rollWriters(WriterLength writerLength) throws IOException { if (writerLength != null) { closeWriter(writerLength); @@ -376,7 +388,7 @@ public class HFileOutputFormat2 Path familydir = new Path(outputDir, Bytes.toString(family)); if (writeMultipleTables) { familydir = new Path(outputDir, - new Path(Bytes.toString(tableName), Bytes.toString(family))); + new Path(getTableRelativePath(tableName), Bytes.toString(family))); } WriterLength wl = new WriterLength(); Algorithm compression = compressionMap.get(tableAndFamily); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java index 97ae81c018..58becb5d83 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java @@ -20,7 +20,11 @@ package org.apache.hadoop.hbase.mapreduce; import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; @@ -38,6 +42,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2.TableInfo; import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MapReduceExtendedCell; @@ -72,6 +77,7 @@ public class WALPlayer extends Configured implements Tool { public final static String TABLE_MAP_KEY = "wal.input.tablesmap"; public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator"; public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files"; + protected static final String tableSeparator = ";"; // This relies on Hadoop Configuration to handle warning about deprecated configs and @@ -97,38 +103,34 @@ public class WALPlayer extends Configured implements Tool { */ static class WALKeyValueMapper extends Mapper { - private byte[] table; + private Set tableSet = new HashSet(); - @Override - public void map(WALKey key, WALEdit value, - Context context) - throws IOException { - try { - // skip all other tables - if (Bytes.equals(table, key.getTableName().getName())) { - for (Cell cell : value.getCells()) { - if (WALEdit.isMetaEditFamily(cell)) { - continue; + @Override + public void map(WALKey key, WALEdit value, Context context) throws IOException { + try { + // skip all other tables + TableName table = key.getTableName(); + if (tableSet.contains(table.getNameAsString())) { + for (Cell cell : value.getCells()) { + if (WALEdit.isMetaEditFamily(cell)) { + continue; + } + byte[] outKey = + Bytes.add(table.getName(), Bytes.toBytes(tableSeparator), CellUtil.cloneRow(cell)); + context.write(new ImmutableBytesWritable(outKey), new MapReduceExtendedCell(cell)); } - context.write(new ImmutableBytesWritable(CellUtil.cloneRow(cell)), - new MapReduceExtendedCell(cell)); } - } - } catch (InterruptedException e) { - e.printStackTrace(); + } catch (InterruptedException e) { + e.printStackTrace(); } } @Override public void setup(Context context) throws IOException { - // only a single table is supported when HFiles are generated with HFileOutputFormat String[] tables = context.getConfiguration().getStrings(TABLES_KEY); - if (tables == null || tables.length != 1) { - // this can only happen when WALMapper is used directly by a class other than WALPlayer - throw new IOException("Exactly one table must be specified for bulk HFile case."); + for(String table: tables) { + tableSet.add(table); } - table = Bytes.toBytes(tables[0]); - } } @@ -283,7 +285,8 @@ public class WALPlayer extends Configured implements Tool { conf.setStrings(TABLES_KEY, tables); conf.setStrings(TABLE_MAP_KEY, tableMap); conf.set(FileInputFormat.INPUT_DIR, inputDirs); - Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis())); + Job job = + Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis())); job.setJarByClass(WALPlayer.class); job.setInputFormatClass(WALInputFormat.class); @@ -294,22 +297,24 @@ public class WALPlayer extends Configured implements Tool { LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); // the bulk HFile case - if (tables.length != 1) { - throw new IOException("Exactly one table must be specified for the bulk export option"); - } - TableName tableName = TableName.valueOf(tables[0]); + List tableNames = getTableNameList(tables); + job.setMapperClass(WALKeyValueMapper.class); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputValueClass(MapReduceExtendedCell.class); - try (Connection conn = ConnectionFactory.createConnection(conf); + try (Connection conn = ConnectionFactory.createConnection(conf);) { + List tableInfoList = new ArrayList(); + for (TableName tableName : tableNames) { Table table = conn.getTable(tableName); - RegionLocator regionLocator = conn.getRegionLocator(tableName)) { - HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); + RegionLocator regionLocator = conn.getRegionLocator(tableName); + tableInfoList.add(new TableInfo(table.getDescriptor(), regionLocator)); + } + MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfoList); } TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), - org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); + org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); } else { // output to live cluster job.setMapperClass(WALMapper.class); @@ -321,7 +326,8 @@ public class WALPlayer extends Configured implements Tool { } String codecCls = WALCellCodec.getWALCellCodecClass(conf); try { - TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Class.forName(codecCls)); + TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), + Class.forName(codecCls)); } catch (Exception e) { throw new IOException("Cannot determine wal codec class " + codecCls, e); } @@ -329,6 +335,14 @@ public class WALPlayer extends Configured implements Tool { } + private List getTableNameList(String[] tables) { + List list = new ArrayList(); + for(String name : tables) { + list.add(TableName.valueOf(name)); + } + return list; + } + /** * Print usage * @param errorMsg Error message. Can be null. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java index 49ed11a430..9c4d1a1a81 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java @@ -88,9 +88,9 @@ public class FSTableDescriptors implements TableDescriptors { /** * The file name prefix used to store HTD in HDFS */ - static final String TABLEINFO_FILE_PREFIX = ".tableinfo"; - static final String TABLEINFO_DIR = ".tabledesc"; - static final String TMP_DIR = ".tmp"; + public static final String TABLEINFO_FILE_PREFIX = ".tableinfo"; + public static final String TABLEINFO_DIR = ".tabledesc"; + public static final String TMP_DIR = ".tmp"; // This cache does not age out the old stuff. Thinking is that the amount // of data we keep up in here is so small, no need to do occasional purge. -- 2.11.0 (Apple Git-81)