diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java index 5282dc2..7e24a29 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java @@ -36,6 +36,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import com.sun.source.tree.NewClassTree; /** * Abstract Cleaner that uses a chain of delegates to clean a directory of files @@ -268,7 +269,59 @@ public abstract class CleanerChore extends Chore return deletedFileCount == files.size(); } - + + /** + * This is a helper method that recursively visits each directory and collects all the valid files + * for HFileCleaner class + * @param files An array of files that are recursively searched + * @return List of valid FileStatus objects corresponding to the given input files + * @throws IOException + */ + private List getValidFiles(FileStatus[] files) throws IOException{ + if(files==null){ + return null; + } + List deleteFiles = Lists.newArrayList(); + List childDeletables; + for(FileStatus fs : files){ + Path path = fs.getPath(); + if(fs.isDir()){ + childDeletables = getValidFiles(FSUtils.listStatus(this.fs, path)); + if(childDeletables != null){ + deleteFiles.addAll(childDeletables); + } + } + else if(validate(path)){ + deleteFiles.add(fs); + } + } + return deleteFiles; + } + + /** + * This method returns a set of files that can be deleted from a given file listing after applying + * the provided cleaners + * @param files List of files/directories that needs to be recursively searched + * @param cleaners A list of cleaners which needs to applied on the given list of files/directories. + * Each class name is passed a string and this method builds and loads the corresponding cleaner class + * @return List of FileStatus objects that can be deleted from the given input files + * @throws IOException + */ + public List getDeletableFiles(FileStatus[] files, List cleaners) throws IOException{ + if(cleaners == null){ + return null; + } + List validFiles = getValidFiles(files); + Iterable deletableValidFiles = validFiles; + for (String cleaner : cleaners) { + T cleanerClass = newFileCleaner(cleaner, conf); + if (!(cleanerClass.isStopped() || this.stopper.isStopped())) { + deletableValidFiles = cleanerClass.getDeletableFiles(deletableValidFiles); + } + } + return Lists.newArrayList(deletableValidFiles); + } + @Override public void cleanup() { for (T lc : this.cleanersChain) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java index 31316eb..0855e76 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.TableName; @@ -39,9 +40,15 @@ import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RegionStates; import org.apache.hadoop.hbase.master.RegionState.State; +import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; +import org.apache.hadoop.hbase.master.cleaner.FileCleanerDelegate; +import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; +import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Threads; import org.apache.zookeeper.KeeperException; +import com.google.common.collect.Lists; + @InterfaceAudience.Private public class DeleteTableHandler extends TableEventHandler { private static final Log LOG = LogFactory.getLog(DeleteTableHandler.class); @@ -100,17 +107,35 @@ public class DeleteTableHandler extends TableEventHandler { try { // 4. Delete regions from FS (temp directory) FileSystem fs = mfs.getFileSystem(); + boolean skipArchive = server.getConfiguration().getBoolean("hbase.table.delete.skiparchive", false); + if(skipArchive){ + LOG.debug("Skiparchive enabled. Attempting to delete useless storefiles from table -"+tableName); + HFileCleaner hfc = ((HMaster)server).getHFileCleaner(); + FileStatus[] files = FSUtils.listStatus(fs, tempTableDir); + List cleanerList = Lists.newArrayList(); + cleanerList.add(org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner.class.getName()); + cleanerList.add(org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner.class.getName()); + List validDeletes = hfc.getDeletableFiles(files,cleanerList); + if(validDeletes != null){ + String message; + for(FileStatus f: validDeletes){ + message = fs.delete(f.getPath())?"Successful":"Failed"; + LOG.debug("Attempt to delete file "+f.getPath()+"-"+message); + } + } + } + // If skiparchive was enabled all the un-necessary hfiles are already deleted + // So we just archive regions and HFileCleaner takes care of deleting empty region + // directories automatically for (HRegionInfo hri: regions) { - LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); - HFileArchiver.archiveRegion(fs, mfs.getRootDir(), - tempTableDir, new Path(tempTableDir, hri.getEncodedName())); + LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); + HFileArchiver.archiveRegion(fs, mfs.getRootDir(), + tempTableDir, new Path(tempTableDir, hri.getEncodedName())); } - // 5. Delete table from FS (temp directory) if (!fs.delete(tempTableDir, true)) { LOG.error("Couldn't delete " + tempTableDir); } - LOG.debug("Table '" + tableName + "' archived!"); } finally { // 6. Update table descriptor cache diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java new file mode 100644 index 0000000..753a0e0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestSkipArchiveTableDeleteHandler.java @@ -0,0 +1,184 @@ +package org.apache.hadoop.hbase.master.handler; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + + +@Category(MediumTests.class) +public class TestSkipArchiveTableDeleteHandler { + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final TableName TABLENAME1 = + TableName.valueOf("test_table_skiparchive1"); + private static final TableName TABLENAME2 = + TableName.valueOf("test_table_skiparchive2"); + private static final TableName TABLENAME3 = + TableName.valueOf("test_table_skiparchive3"); + private static final byte[][] FAMILIES = new byte[][] { Bytes.toBytes("cf1"), + Bytes.toBytes("cf2"), Bytes.toBytes("cf3") }; + private static HTable t1,t2,t3; + + @BeforeClass + public static void beforeAllTests() throws Exception { + + // Set the skiparchive setting in master conf + TEST_UTIL.getConfiguration().setBoolean("hbase.table.delete.skiparchive", true); + TEST_UTIL.startMiniCluster(); + + // Create a tables of three families. This will assign a region. + TEST_UTIL.createTable(TABLENAME1, FAMILIES); + TEST_UTIL.createTable(TABLENAME2, FAMILIES); + TEST_UTIL.createTable(TABLENAME3, FAMILIES); + + // Build the HTable objects for the 3 tables + t1 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME1); + t2 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME2); + t3 = new HTable(TEST_UTIL.getConfiguration(), TABLENAME3); + + // Create multiple regions in all the three column families + while(TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager() + .getRegionStates().getRegionsInTransition().size() > 0) { + Thread.sleep(100); + } + // Load the table with data for all families + TEST_UTIL.loadTable(t1, FAMILIES); + TEST_UTIL.loadTable(t2, FAMILIES); + TEST_UTIL.loadTable(t3, FAMILIES); + TEST_UTIL.flush(); + + t1.close(); + t2.close(); + t3.close(); + } + + @AfterClass + public static void afterAllTests() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException, InterruptedException { + TEST_UTIL.enableDebug(org.apache.hadoop.hbase.master.handler.DeleteTableHandler.class); + } + + /** + * A helper method to get the count of rows in the test tables. + * @return row count of the test table + * @throws IOException + */ + private int getTableRowCount(HTable t) throws IOException{ + Scan scan = new Scan(); + ResultScanner rs = t.getScanner(scan); + int rowCount = 0; + try{ + for (Result r = rs.next(); r != null; r = rs.next()) { + rowCount++; + } + rs.close(); + }finally { + rs.close(); + } + return rowCount; + } + + @Test + public void deleteTestWithSnapshot() throws Exception { + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + int beforeCount,afterCount,restoreCount; + // Take a snapshot of the table + byte[] snapshot = Bytes.toBytes(TABLENAME1.toString()+".snapshot"); + admin.snapshot(snapshot, Bytes.toBytes(TABLENAME1.toString())); + beforeCount = getTableRowCount(t1); + // Delete a few rows from table, for this test we delete all the rows starting with 'a'. + t1.setAutoFlush(false); + for (byte[] row : HBaseTestingUtility.ROWS) { + if(row[0]=='a'){ + Delete delete = new Delete(row); + t1.delete(delete); + } + else{ + break; + } + } + t1.flushCommits(); + afterCount = getTableRowCount(t1); + assertFalse(beforeCount == afterCount); + // Delete the table, since skiparchive is set unnecessary hfiles created due to deletes are deleted + admin.disableTable(TABLENAME1); + admin.deleteTable(TABLENAME1); + // Restore the snapshot + admin.restoreSnapshot(snapshot); + // count the rows in the new table + restoreCount = getTableRowCount(t1); + assertTrue(beforeCount == restoreCount); + } + + @Test + public void deleteTestNormalTable() throws Exception { + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + //Delete the table from HBase + admin.disableTable(TABLENAME2); + admin.deleteTable(TABLENAME2); + + // Make sure the table is deleted properly + TableName[] tables = admin.listTableNames(); + for(TableName table:tables){ + if(table.getNameAsString().equals(TABLENAME2.getNameAsString())){ + assert(false); + } + } + } + + @Test + public void deleteTestWithHFileLinks() throws Exception { + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + byte[] snapshot = Bytes.toBytes(TABLENAME3.toString()+".snapshot"); + int beforeCount,afterCount,cloneCount; + admin.snapshot(snapshot, Bytes.toBytes(TABLENAME3.toString())); + beforeCount = getTableRowCount(t3); + // Clone the snapshot to a new table and the new table has HFileLinks to the original table; + admin.cloneSnapshot(snapshot,Bytes.toBytes(TABLENAME3.toString()+".clone")); + // Delete the snapshot + admin.deleteSnapshot(snapshot); + // Delete a few rows from the original table, for this test we delete all the rows starting with 'a'. + t3.setAutoFlush(false); + for (byte[] row : HBaseTestingUtility.ROWS) { + if(row[0]=='a'){ + Delete delete = new Delete(row); + t3.delete(delete); + } + else{ + break; + } + } + t3.flushCommits(); + afterCount = getTableRowCount(t3); + assertFalse(beforeCount == afterCount); + // Delete the original table, this should delete a few HFiles that were added due to deletes + admin.disableTable(TABLENAME3); + admin.deleteTable(TABLENAME3); + // count the number of rows in the clone post delete + cloneCount = getTableRowCount(new HTable(TEST_UTIL.getConfiguration(),Bytes.toBytes(TABLENAME3.toString()+".clone"))); + assert(cloneCount==beforeCount); + } + +}