From 38957985357152f237a1dbe35f3eeeaae0495a61 Mon Sep 17 00:00:00 2001 From: J Mohamed Zahoor Date: Sat, 18 Aug 2012 21:36:51 +0530 Subject: [PATCH] HBASE-6564: HDFS space is not reclaimed when a column family is deleted --- .../hadoop/hbase/master/MasterFileSystem.java | 10 +- .../master/handler/TableDeleteFamilyHandler.java | 10 ++ .../apache/hadoop/hbase/HBaseTestingUtility.java | 43 +++++- .../master/handler/TestTableFamilyHandlers.java | 147 +++++++++++++++++++++ 4 files changed, 207 insertions(+), 3 deletions(-) create mode 100755 hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index d9e0d01..fe97237 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.UUID; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -47,7 +46,6 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter; import org.apache.hadoop.hbase.regionserver.wal.OrphanHLogAfterSplitException; @@ -461,6 +459,14 @@ public class MasterFileSystem { // @see HRegion.checkRegioninfoOnFilesystem() } + public void deleteFamily(HRegionInfo region, byte[] familyName) + throws IOException { + Path delDir = new Path(rootdir, + new Path(region.getTableNameAsString(), new Path( + region.getEncodedName(), new Path(Bytes.toString(familyName))))); + fs.delete(delDir, true); + } + public void stop() { if (splitLogManager != null) { this.splitLogManager.stop(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/TableDeleteFamilyHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/TableDeleteFamilyHandler.java index c10b056..317fe70 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/TableDeleteFamilyHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/TableDeleteFamilyHandler.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.util.Bytes; @@ -57,6 +58,15 @@ public class TableDeleteFamilyHandler extends TableEventHandler { HTableDescriptor htd = this.masterServices.getMasterFileSystem().deleteColumn(tableName, familyName); // Update in-memory descriptor cache + + + // Remove the column family from the file system + MasterFileSystem mfs = this.masterServices.getMasterFileSystem(); + for (HRegionInfo hri : hris) { + // Delete the family directory in FS for all the regions one by one + mfs.deleteFamily(hri, familyName); + } + this.masterServices.getTableDescriptors().add(htd); if (cpHost != null) { cpHost.postDeleteColumnHandler(this.tableName, this.familyName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 3705328..e60e95a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -71,9 +71,9 @@ import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl; -import org.apache.hadoop.hbase.regionserver.HStore; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; @@ -1021,6 +1021,37 @@ public class HBaseTestingUtility { t.flushCommits(); return rowCount; } + + /** + * Load table of multiple column families with rows from 'aaa' to 'zzz'. + * @param t Table + * @param f Array of Families to load + * @return Count of rows loaded. + * @throws IOException + */ + public int loadTable(final HTable t, final byte[][] f) throws IOException { + t.setAutoFlush(false); + byte[] k = new byte[3]; + int rowCount = 0; + for (byte b1 = 'a'; b1 <= 'z'; b1++) { + for (byte b2 = 'a'; b2 <= 'z'; b2++) { + for (byte b3 = 'a'; b3 <= 'z'; b3++) { + k[0] = b1; + k[1] = b2; + k[2] = b3; + Put put = new Put(k); + for (int i = 0; i < f.length; i++) { + put.add(f[i], null, k); + } + t.put(put); + rowCount++; + } + } + } + t.flushCommits(); + return rowCount; + } + /** * Load region with rows from 'aaa' to 'zzz'. * @param r Region @@ -1155,6 +1186,10 @@ public class HBaseTestingUtility { // and end key. Adding the custom regions below adds those blindly, // including the new start region from empty to "bbb". lg List rows = getMetaTableRows(htd.getName()); + String regionToDeleteInFS = table + .getRegionsInRange(Bytes.toBytes(""), Bytes.toBytes("")).get(0) + .getRegionInfo().getEncodedName(); + List newRegions = new ArrayList(startKeys.length); // add custom ones int count = 0; @@ -1176,6 +1211,12 @@ public class HBaseTestingUtility { Bytes.toStringBinary(row)); meta.delete(new Delete(row)); } + // remove the "old" region from FS + Path tableDir = new Path(getDefaultRootDirPath().toString() + + System.getProperty("file.separator") + htd.getNameAsString() + + System.getProperty("file.separator") + regionToDeleteInFS); + getDFSCluster().getFileSystem().delete(tableDir); + // flush cache of regions HConnection conn = table.getConnection(); conn.clearRegionCache(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java new file mode 100755 index 0000000..dc92ca1 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java @@ -0,0 +1,147 @@ +package org.apache.hadoop.hbase.master.handler; + + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(LargeTests.class) +public class TestTableFamilyHandlers { + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final String TABLENAME = "column_family_handlers"; + private static final byte[][] FAMILIES = new byte[][] { Bytes.toBytes("cf1"), + Bytes.toBytes("cf2"), Bytes.toBytes("cf3") }; + + + + + /** + * Start up a mini cluster and put a small table of empty regions into it. + * + * @throws Exception + */ + + @BeforeClass + public static void beforeAllTests() throws Exception { + + TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); + TEST_UTIL.startMiniCluster(2); + + // Create a table of three families. This will assign a region. + TEST_UTIL.createTable(Bytes.toBytes(TABLENAME), FAMILIES); + HTable t = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + + // Create multiple regions in all the three column families + TEST_UTIL.createMultiRegions(t, FAMILIES[0]); + + // Load the table with data for all families + TEST_UTIL.loadTable(t, FAMILIES); + + TEST_UTIL.flush(); + + t.close(); + } + + @AfterClass + public static void afterAllTests() throws Exception { + TEST_UTIL.deleteTable(Bytes.toBytes(TABLENAME)); + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException, InterruptedException { + TEST_UTIL.ensureSomeRegionServersAvailable(2); + } + + @Test + public void deleteColumnFamilyWithMultipleRegions() throws Exception { + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + HTableDescriptor beforehtd = admin.getTableDescriptor(Bytes + .toBytes(TABLENAME)); + + FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem(); + + + // 1 - Check if table exists in descriptor + assertTrue(admin.isTableAvailable(TABLENAME)); + + + + // 2 - Check if all three families exists in descriptor + assertEquals(3, beforehtd.getColumnFamilies().length); + HColumnDescriptor[] families = beforehtd.getColumnFamilies(); + for (int i = 0; i < families.length; i++) { + + assertTrue(families[i].getNameAsString().equals("cf" + (i + 1))); + } + + // 3 - Check if table exists in FS + Path tableDir = new Path(TEST_UTIL.getDefaultRootDirPath().toString() + "/" + + TABLENAME); + assertTrue(fs.exists(tableDir)); + + + // 4 - Check if all the 3 column families exists in FS + FileStatus[] fileStatus = fs.listStatus(tableDir); + for (int i = 0; i < fileStatus.length; i++) { + if (fileStatus[i].isDir() == true) { + FileStatus[] cf = fs.listStatus(fileStatus[i].getPath()); + int k = 1; + for (int j = 0; j < cf.length; j++) { + if (cf[j].isDir() == true + && cf[j].getPath().getName().startsWith(".") == false) { + assertTrue(cf[j].getPath().getName().equals("cf" + k)); + k++; + } + } + } + } + + + // TETS - Disable and delete the column family + admin.disableTable(TABLENAME); + admin.deleteColumn(TABLENAME, "cf2"); + + + // 5 - Check if only 2 column families exist in the descriptor + HTableDescriptor afterhtd = admin.getTableDescriptor(Bytes + .toBytes(TABLENAME)); + assertEquals(2, afterhtd.getColumnFamilies().length); + HColumnDescriptor[] newFamilies = afterhtd.getColumnFamilies(); + assertTrue(newFamilies[0].getNameAsString().equals("cf1")); + assertTrue(newFamilies[1].getNameAsString().equals("cf3")); + + // 6 - Check if the second column family is gone from the FS + fileStatus = fs.listStatus(tableDir); + for (int i = 0; i < fileStatus.length; i++) { + if (fileStatus[i].isDir() == true) { + FileStatus[] cf = fs.listStatus(fileStatus[i].getPath()); + for (int j = 0; j < cf.length; j++) { + if (cf[j].isDir() == true) { + assertFalse(cf[j].getPath().getName().equals("cf2")); + } + } + } + } + } +} -- 1.7.11.1 From c3988de92f0e3a543b81272cb84d08dfaf031485 Mon Sep 17 00:00:00 2001 From: J Mohamed Zahoor Date: Sun, 19 Aug 2012 11:45:10 +0530 Subject: [PATCH] HBASE-6564: Review comments incorporated --- .../hadoop/hbase/master/MasterFileSystem.java | 5 ++++- .../master/handler/TestTableFamilyHandlers.java | 24 +++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index fe97237..30cddbd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -464,7 +464,10 @@ public class MasterFileSystem { Path delDir = new Path(rootdir, new Path(region.getTableNameAsString(), new Path( region.getEncodedName(), new Path(Bytes.toString(familyName))))); - fs.delete(delDir, true); + if (fs.delete(delDir, true) == false) { + LOG.fatal("Could not delete family " + Bytes.toString(familyName) + + " for region " + region.getTableNameAsString()); + } } public void stop() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java index dc92ca1..78ce1c2 100755 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestTableFamilyHandlers.java @@ -1,3 +1,22 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.master.handler; @@ -118,7 +137,7 @@ public class TestTableFamilyHandlers { } - // TETS - Disable and delete the column family + // TEST - Disable and delete the column family admin.disableTable(TABLENAME); admin.deleteColumn(TABLENAME, "cf2"); @@ -144,4 +163,7 @@ public class TestTableFamilyHandlers { } } } + + @org.junit.Rule + public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); } -- 1.7.11.1