diff --git a/conf/hbase-site.xml b/conf/hbase-site.xml index c516ac7..aad015e 100644 --- a/conf/hbase-site.xml +++ b/conf/hbase-site.xml @@ -21,4 +21,18 @@ */ --> + + hbase.rootdir + hdfs://localhost:9000/hbase + The directory shared by RegionServers. + + + + hbase.cluster.distributed + true + The mode the cluster will be in. Possible values are + false: standalone and pseudo-distributed setups with managed Zookeeper + true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java index f9cc60f..8ec7e0b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java @@ -32,6 +32,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.master.cleaner.FileCleanerDelegate; +import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; +import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; @@ -56,6 +59,10 @@ public class HFileArchiver { /** Number of retries in case of fs operation failure */ private static final int DEFAULT_RETRIES_NUMBER = 3; + private static final String HFILE_SKIP_ARCHIVE_CONF = "hbase.hfile.skip.archive"; + + private static SnapshotHFileCleaner snapshotCleaner = null; + private static HFileLinkCleaner fileLinkCleaner = null; private HFileArchiver() { // hidden ctor since this is just a util @@ -72,12 +79,13 @@ public class HFileArchiver { public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo info) throws IOException { Path rootDir = FSUtils.getRootDir(conf); - archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()), + archiveRegion(conf,fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()), HRegion.getRegionDir(rootDir, info)); } /** * Remove an entire region from the table directory via archiving the region's hfiles. + * @param conf Configuration object to check the skipArchive key * @param fs {@link FileSystem} from which to remove the region * @param rootdir {@link Path} to the root directory where hbase files are stored (for building * the archive path) @@ -87,7 +95,7 @@ public class HFileArchiver { * operations could not complete. * @throws IOException if the request cannot be completed */ - public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir) + public static boolean archiveRegion(Configuration conf,FileSystem fs, Path rootdir, Path tableDir, Path regionDir) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("ARCHIVING " + regionDir.toString()); @@ -134,7 +142,7 @@ public class HFileArchiver { LOG.debug("Archiving " + toArchive); boolean success = false; try { - success = resolveAndArchive(fs, regionArchiveDir, toArchive); + success = resolveAndArchive(conf, fs, regionArchiveDir, toArchive); } catch (IOException e) { LOG.error("Failed to archive " + toArchive, e); success = false; @@ -174,7 +182,7 @@ public class HFileArchiver { Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, parent, tableDir, family); // do the actual archive - if (!resolveAndArchive(fs, storeArchiveDir, toArchive)) { + if (!resolveAndArchive(conf, fs, storeArchiveDir, toArchive)) { throw new IOException("Failed to archive/delete all the files for region:" + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family) + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); @@ -228,7 +236,7 @@ public class HFileArchiver { Collection storeFiles = Collections2.transform(compactedFiles, getStorePath); // do the actual archive - if (!resolveAndArchive(fs, storeArchiveDir, storeFiles)) { + if (!resolveAndArchive(conf, fs, storeArchiveDir, storeFiles)) { throw new IOException("Failed to archive/delete all the files for region:" + Bytes.toString(regionInfo.getRegionName()) + ", family:" + Bytes.toString(family) + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); @@ -257,7 +265,7 @@ public class HFileArchiver { // do the actual archive long start = EnvironmentEdgeManager.currentTimeMillis(); File file = new FileablePath(fs, storeFile); - if (!resolveAndArchiveFile(storeArchiveDir, file, Long.toString(start))) { + if (!resolveAndArchiveFile(conf,storeArchiveDir, file, Long.toString(start))) { throw new IOException("Failed to archive/delete the file for region:" + regionInfo.getRegionNameAsString() + ", family:" + Bytes.toString(family) + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); @@ -277,11 +285,11 @@ public class HFileArchiver { * @return true on success, false otherwise * @throws IOException on unexpected failure */ - private static boolean resolveAndArchive(FileSystem fs, Path baseArchiveDir, + private static boolean resolveAndArchive(Configuration conf, FileSystem fs, Path baseArchiveDir, Collection toArchive) throws IOException { if (LOG.isTraceEnabled()) LOG.trace("Starting to archive " + toArchive); long start = EnvironmentEdgeManager.currentTimeMillis(); - List failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start); + List failures = resolveAndArchive(conf, fs, baseArchiveDir, toArchive, start); // notify that some files were not archived. // We can't delete the files otherwise snapshots or other backup system @@ -308,7 +316,7 @@ public class HFileArchiver { * @return the list of failed to archive files. * @throws IOException if an unexpected file operation exception occured */ - private static List resolveAndArchive(FileSystem fs, Path baseArchiveDir, + private static List resolveAndArchive(Configuration conf, FileSystem fs, Path baseArchiveDir, Collection toArchive, long start) throws IOException { // short circuit if no files to move if (toArchive.size() == 0) return Collections.emptyList(); @@ -332,7 +340,7 @@ public class HFileArchiver { if (LOG.isTraceEnabled()) LOG.trace("Archiving: " + file); if (file.isFile()) { // attempt to archive the file - if (!resolveAndArchiveFile(baseArchiveDir, file, startTime)) { + if (!resolveAndArchiveFile(conf, baseArchiveDir, file, startTime)) { LOG.warn("Couldn't archive " + file + " into backup directory: " + baseArchiveDir); failures.add(file); } @@ -344,7 +352,7 @@ public class HFileArchiver { // and then get all the files from that directory and attempt to // archive those too Collection children = file.getChildren(); - failures.addAll(resolveAndArchive(fs, parentArchiveDir, children, start)); + failures.addAll(resolveAndArchive(conf, fs, parentArchiveDir, children, start)); } } catch (IOException e) { LOG.warn("Failed to archive " + file, e); @@ -366,21 +374,38 @@ public class HFileArchiver { * problem, but the operation still completed. * @throws IOException on failure to complete {@link FileSystem} operations. */ - private static boolean resolveAndArchiveFile(Path archiveDir, File currentFile, - String archiveStartTime) throws IOException { - // build path as it should be in the archive - String filename = currentFile.getName(); - Path archiveFile = new Path(archiveDir, filename); - FileSystem fs = currentFile.getFileSystem(); - - // if the file already exists in the archive, move that one to a timestamped backup. This is a - // really, really unlikely situtation, where we get the same name for the existing file, but - // is included just for that 1 in trillion chance. - if (fs.exists(archiveFile)) { - if (LOG.isDebugEnabled()) { - LOG.debug("File:" + archiveFile + " already exists in archive, moving to " - + "timestamped backup and overwriting current."); + private static boolean resolveAndArchiveFile(Configuration conf, Path archiveDir, File currentFile, + String archiveStartTime) throws IOException { + + FileSystem fs = currentFile.getFileSystem(); + FileStatus currentStatus = fs.getFileStatus(currentFile.getPath()); + + if(conf.getBoolean(HFILE_SKIP_ARCHIVE_CONF, false)){ + initCleaners(conf); + if(snapshotCleaner.isFileDeletable(currentStatus) && fileLinkCleaner.isFileDeletable(currentStatus)){ + LOG.debug("Attempting to delete file without archiving :"+currentFile.getPath().getName()); + // Attempt to delete the file, if successful return, else let the archiver archive it + if(fs.delete(currentFile.getPath())){ + return true; + } + else{ + LOG.debug("Attempt to delete file "+currentFile.getPath().getName()+" failed. Moving it to the archive."); + } + } } + // build path as it should be in the archive + String filename = currentFile.getName(); + Path archiveFile = new Path(archiveDir, filename); + + + // if the file already exists in the archive, move that one to a timestamped backup. This is a + // really, really unlikely situtation, where we get the same name for the existing file, but + // is included just for that 1 in trillion chance. + if (fs.exists(archiveFile)) { + if (LOG.isDebugEnabled()) { + LOG.debug("File:" + archiveFile + " already exists in archive, moving to " + + "timestamped backup and overwriting current."); + } // move the archive file to the stamped backup Path backedupArchiveFile = new Path(archiveDir, filename + SEPARATOR + archiveStartTime); @@ -441,6 +466,21 @@ public class HFileArchiver { } /** + * Utility method to instantiate cleaners if null + */ + private static void initCleaners(Configuration conf) { + + if(snapshotCleaner == null){ + snapshotCleaner = new SnapshotHFileCleaner(); + snapshotCleaner.setConf(conf); + } + if(fileLinkCleaner == null){ + fileLinkCleaner = new HFileLinkCleaner(); + fileLinkCleaner.setConf(conf); + } + } + +/** * Without regard for backup, delete a region. Should be used with caution. * @param regionDir {@link Path} to the region to be deleted. * @param fs FileSystem from which to delete the region diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 26da01e..5b70882 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -501,7 +501,7 @@ public class MasterFileSystem { // if not the cleaner will take care of them. for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { - HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + HFileArchiver.archiveRegion(c,fs, this.rootdir, tabledir, regiondir); } } if (!fs.delete(tmpdir, true)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java index 31316eb..d923402 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java @@ -102,7 +102,7 @@ public class DeleteTableHandler extends TableEventHandler { FileSystem fs = mfs.getFileSystem(); for (HRegionInfo hri: regions) { LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); - HFileArchiver.archiveRegion(fs, mfs.getRootDir(), + HFileArchiver.archiveRegion(server.getConfiguration(),fs, mfs.getRootDir(), tempTableDir, new Path(tempTableDir, hri.getEncodedName())); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 2929f47..d98e983 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -887,7 +887,7 @@ public class HRegionFileSystem { // Archive region Path rootDir = FSUtils.getRootDir(conf); - HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir); + HFileArchiver.archiveRegion(conf,fs, rootDir, tableDir, regionDir); // Delete empty region dir if (!fs.delete(regionDir, true)) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java index b5cfa9a..8b4e4a1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java @@ -372,7 +372,7 @@ public class TestHFileArchiving { try { // Try to archive the file - HFileArchiver.archiveRegion(fs, rootDir, + HFileArchiver.archiveRegion(UTIL.getConfiguration(),fs, rootDir, sourceRegionDir.getParent(), sourceRegionDir); // The archiver succeded, the file is no longer in the original location diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java new file mode 100644 index 0000000..87531e9 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java @@ -0,0 +1,184 @@ +package org.apache.hadoop.hbase.master.handler; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + + +@Category(MediumTests.class) +public class TestSkipArchiveTableDeleteHandler { + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final TableName TABLENAME1 = + TableName.valueOf("test_table_skiparchive1"); + private static final TableName TABLENAME2 = + TableName.valueOf("test_table_skiparchive2"); + private static final TableName TABLENAME3 = + TableName.valueOf("test_table_skiparchive3"); + private static final byte[][] FAMILIES = new byte[][] { Bytes.toBytes("cf1"), + Bytes.toBytes("cf2"), Bytes.toBytes("cf3") }; + private static HTable t1,t2,t3; + + @BeforeClass + public static void beforeAllTests() throws Exception { + + // Set the skiparchive setting in conf + TEST_UTIL.getConfiguration().setBoolean("hbase.hfile.skip.archive", true); + TEST_UTIL.startMiniCluster(); + + // Create a tables of three families. This will assign a region. + TEST_UTIL.createTable(TABLENAME1, FAMILIES); + TEST_UTIL.createTable(TABLENAME2, FAMILIES); + TEST_UTIL.createTable(TABLENAME3, FAMILIES); + + // Build the HTable objects for the 3 tables + t1 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME1); + t2 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME2); + t3 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME3); + + // Create multiple regions in all the three column families + while(TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager() + .getRegionStates().getRegionsInTransition().size() > 0) { + Thread.sleep(100); + } + // Load the table with data for all families + TEST_UTIL.loadTable(t1, FAMILIES); + TEST_UTIL.loadTable(t2, FAMILIES); + TEST_UTIL.loadTable(t3, FAMILIES); + TEST_UTIL.flush(); + + t1.close(); + t2.close(); + t3.close(); + } + + @AfterClass + public static void afterAllTests() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException, InterruptedException { + TEST_UTIL.enableDebug(org.apache.hadoop.hbase.master.handler.DeleteTableHandler.class); + } + + /** + * A helper method to get the count of rows in the test tables. + * @return row count of the test table + * @throws IOException + */ + private int getTableRowCount(HTable t) throws IOException{ + Scan scan = new Scan(); + ResultScanner rs = t.getScanner(scan); + int rowCount = 0; + try{ + for (Result r = rs.next(); r != null; r = rs.next()) { + rowCount++; + } + rs.close(); + }finally { + rs.close(); + } + return rowCount; + } + + @Test + public void deleteTestWithSnapshot() throws Exception { + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + int beforeCount,afterCount,restoreCount; + // Take a snapshot of the table + byte[] snapshot = Bytes.toBytes(TABLENAME1.toString()+".snapshot"); + admin.snapshot(snapshot, Bytes.toBytes(TABLENAME1.toString())); + beforeCount = getTableRowCount(t1); + // Delete a few rows from table, for this test we delete all the rows starting with 'a'. + t1.setAutoFlush(false); + for (byte[] row : HBaseTestingUtility.ROWS) { + if(row[0]=='a'){ + Delete delete = new Delete(row); + t1.delete(delete); + } + else{ + break; + } + } + t1.flushCommits(); + afterCount = getTableRowCount(t1); + assertFalse(beforeCount == afterCount); + // Delete the table, since skiparchive is set unnecessary hfiles created due to deletes are deleted + admin.disableTable(TABLENAME1); + admin.deleteTable(TABLENAME1); + // Restore the snapshot + admin.restoreSnapshot(snapshot); + // count the rows in the new table + restoreCount = getTableRowCount(t1); + assertTrue(beforeCount == restoreCount); + } + + @Test + public void deleteTestNormalTable() throws Exception { + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + //Delete the table from HBase + admin.disableTable(TABLENAME2); + admin.deleteTable(TABLENAME2); + + // Make sure the table is deleted properly + TableName[] tables = admin.listTableNames(); + for(TableName table:tables){ + if(table.getNameAsString().equals(TABLENAME2.getNameAsString())){ + assert(false); + } + } + } + + @Test + public void deleteTestWithHFileLinks() throws Exception { + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + byte[] snapshot = Bytes.toBytes(TABLENAME3.toString()+".snapshot"); + int beforeCount,afterCount,cloneCount; + admin.snapshot(snapshot, Bytes.toBytes(TABLENAME3.toString())); + beforeCount = getTableRowCount(t3); + // Clone the snapshot to a new table and the new table has HFileLinks to the original table; + admin.cloneSnapshot(snapshot,Bytes.toBytes(TABLENAME3.toString()+".clone")); + // Delete the snapshot + admin.deleteSnapshot(snapshot); + // Delete a few rows from the original table, for this test we delete all the rows starting with 'a'. + t3.setAutoFlush(false); + for (byte[] row : HBaseTestingUtility.ROWS) { + if(row[0]=='a'){ + Delete delete = new Delete(row); + t3.delete(delete); + } + else{ + break; + } + } + t3.flushCommits(); + afterCount = getTableRowCount(t3); + assertFalse(beforeCount == afterCount); + // Delete the original table, this should delete a few HFiles that were added due to deletes + admin.disableTable(TABLENAME3); + admin.deleteTable(TABLENAME3); + // count the number of rows in the clone post delete + cloneCount = getTableRowCount(new HTable(TEST_UTIL.getConfiguration(),Bytes.toBytes(TABLENAME3.toString()+".clone"))); + assert(cloneCount==beforeCount); + } + +}