Index: src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (revision 0) @@ -0,0 +1,178 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.Test; + +public class TestCatalogJanitor { + + /** + * Pseudo server for below tests. + */ + class MockServer implements Server { + @Override + public CatalogTracker getCatalogTracker() { + // TODO Auto-generated method stub + return null; + } + + @Override + public Configuration getConfiguration() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getServerName() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + // TODO Auto-generated method stub + return null; + } + + @Override + public void abort(String why, Throwable e) { + // TODO Auto-generated method stub + } + + @Override + public boolean isStopped() { + // TODO Auto-generated method stub + return false; + } + + @Override + public void stop(String why) { + // TODO Auto-generated method stub + } + + } + + /** + * Mock MasterServices for tests below. + */ + class MockMasterServices implements MasterServices { + @Override + public void checkTableModifiable(byte[] tableName) throws IOException { + // TODO Auto-generated method stub + } + + @Override + public AssignmentManager getAssignmentManager() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ExecutorService getExecutorService() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MasterFileSystem getMasterFileSystem() { + // TODO Auto-generated method stub + return null; + } + + @Override + public ServerManager getServerManager() { + // TODO Auto-generated method stub + return null; + } + + } + + @Test + public void testGetHRegionInfo() throws IOException { + assertNull(CatalogJanitor.getHRegionInfo(new Result())); + List kvs = new ArrayList(); + Result r = new Result(kvs); + assertNull(CatalogJanitor.getHRegionInfo(r)); + byte [] f = HConstants.CATALOG_FAMILY; + // Make a key value that doesn't have the expected qualifier. + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, f, + HConstants.SERVER_QUALIFIER, f)); + r = new Result(kvs); + assertNull(CatalogJanitor.getHRegionInfo(r)); + // Make a key that does not have a regioninfo value. + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, f, + HConstants.REGIONINFO_QUALIFIER, f)); + boolean exception = false; + try { + CatalogJanitor.getHRegionInfo(new Result(kvs)); + } catch (Exception ioe) { + exception = true; + } + assertTrue(exception); + // OK, give it what it expects + kvs.clear(); + kvs.add(new KeyValue(HConstants.EMPTY_BYTE_ARRAY, f, + HConstants.REGIONINFO_QUALIFIER, + Writables.getBytes(HRegionInfo.FIRST_META_REGIONINFO))); + HRegionInfo hri = CatalogJanitor.getHRegionInfo(new Result(kvs)); + assertNotNull(hri); + assertTrue(hri.equals(HRegionInfo.FIRST_META_REGIONINFO)); + } + + @Test + public void testCleanParent() throws IOException { + CatalogJanitor janitor = new CatalogJanitor(new MockServer(), + new MockMasterServices(), 1000); + // Create regions. + HTableDescriptor htd = new HTableDescriptor("table"); + htd.addFamily(new HColumnDescriptor("family")); + HRegionInfo parent = + new HRegionInfo(htd, Bytes.toBytes("aaa"), Bytes.toBytes("eee")); + HRegionInfo splita = + new HRegionInfo(htd, Bytes.toBytes("aaa"), Bytes.toBytes("ccc")); + HRegionInfo splitb = + new HRegionInfo(htd, Bytes.toBytes("ccc"), Bytes.toBytes("eee")); + // Test that when both daughter regions are in place, that we do not + // remove the parent. + TODO + // An empty result means that when we go to look for SPLITA or SPLITB, + // they'll not be found so we'll go on to delete the parent. + janitor.cleanParent(parent, new Result()); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 999236) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -2474,22 +2474,6 @@ } /** - * Delete a region's meta information from the passed - * meta region. Deletes the row. - * @param srvr META server to be updated - * @param metaRegionName Meta region name - * @param regionName HRegion to remove from meta - * - * @throws IOException - */ - public static void removeRegionFromMETA(final HRegionInterface srvr, - final byte [] metaRegionName, final byte [] regionName) - throws IOException { - Delete delete = new Delete(regionName); - srvr.delete(metaRegionName, delete); - } - - /** * Utility method used by HMaster marking regions offlined. * @param srvr META server to be updated * @param metaRegionName Meta region name Index: src/main/java/org/apache/hadoop/hbase/HRegionInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HRegionInfo.java (revision 999236) +++ src/main/java/org/apache/hadoop/hbase/HRegionInfo.java (working copy) @@ -529,7 +529,19 @@ this.offLine = offLine; } + /** + * @return True if this is a split parent region. + */ + public boolean isSplitParent() { + if (!isSplit()) return false; + if (!isOffline()) { + LOG.warn("Region is split but NOT offline: " + getRegionNameAsString()); + } + return true; + } + + /** * @see java.lang.Object#toString() */ @Override Index: src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (revision 0) @@ -0,0 +1,264 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.Chore; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Store; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Writables; + +/** + * A janitor for the catalog tables. Scans the .META. catalog + * table on a period looking for unused regions to garbage collect. + */ +class CatalogJanitor extends Chore { + private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName()); + private final Server server; + private final MasterServices services; + + CatalogJanitor(final Server server, final MasterServices services, + final int period) { + super(server.getServerName() + "-CatalogJanitor", period, server); + this.server = server; + this.services = services; + } + + @Override + protected boolean initialChore() { + try { + scan(); + } catch (IOException e) { + LOG.warn("Failed initial scan of catalog table", e); + return false; + } + return true; + } + + @Override + protected void chore() { + try { + scan(); + } catch (IOException e) { + LOG.warn("Failed scan of catalog table", e); + } + } + + /** + * Run janitorial scan of catalog .META. table looking for + * garbage to collect. + * @throws IOException + */ + void scan() throws IOException { + // TODO: Only works with single .META. region currently. Fix. + final AtomicInteger count = new AtomicInteger(0); + // Keep Map of found split parents. There are candidates for cleanup. + final Map splitParents = + new TreeMap(); + // This visitor collects split parents and counts rows in the .META. table + MetaReader.Visitor visitor = new MetaReader.Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + count.incrementAndGet(); + HRegionInfo info = getHRegionInfo(r); + if (info.isSplitParent()) splitParents.put(info, r); + // Returning true means "keep scanning" + return true; + } + }; + // Run full scan of .META. catalog table passing in our custom visitor + MetaReader.fullScan(this.server.getCatalogTracker(), visitor); + // Now work on our list of found parents. See if any we can clean up. + int cleaned = 0; + for (Map.Entry e : splitParents.entrySet()) { + if (cleanParent(e.getKey(), e.getValue())) cleaned++; + } + LOG.info("Scanned " + count.get() + " catalog row(s) and gc'd " + cleaned + + " unreferenced parent region(s)"); + } + + /** + * Get HRegionInfo from passed Map of row values. + * @param result Map to do lookup in. + * @return Null if not found (and logs fact that expected COL_REGIONINFO + * was missing) else deserialized {@link HRegionInfo} + * @throws IOException + */ + static HRegionInfo getHRegionInfo(final Result result) + throws IOException { + byte [] bytes = + result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); + if (bytes == null) { + LOG.warn("REGIONINFO_QUALIFIER is empty in " + result); + return null; + } + return Writables.getHRegionInfo(bytes); + } + + /** + * If daughters no longer hold reference to the parents, delete the parent. + * @param server HRegionInterface of meta server to talk to + * @param parent HRegionInfo of split offlined parent + * @param rowContent Content of parent row in + * metaRegionName + * @return True if we removed parent from meta table and from + * the filesystem. + * @throws IOException + */ + boolean cleanParent(final HRegionInfo parent, + Result rowContent) + throws IOException { + boolean result = false; + // Run checks on each daughter split. + boolean hasReferencesA = + checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER); + boolean hasReferencesB = + checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER); + if (!hasReferencesA && !hasReferencesB) { + LOG.info("Deleting region " + parent.getRegionNameAsString() + + " because daughter splits no longer hold references"); + FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); + Path rootdir = this.services.getMasterFileSystem().getRootDir(); + HRegion.deleteRegion(fs, rootdir, parent); + MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent); + result = true; + } + return result; + } + + + /** + * See if the passed daughter has references in the filesystem to the parent + * and if not, remove the note of daughter region in the parent row: its + * column info:splitA or info:splitB. + * @param parent + * @param rowContent + * @param qualifier + * @return True if this daughter still has references to the parent. + * @throws IOException + */ + boolean checkDaughter(final HRegionInfo parent, + final Result rowContent, final byte [] qualifier) + throws IOException { + HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier); + return hasReferences(parent, rowContent, hri, qualifier); + } + + /** + * Get daughter HRegionInfo out of parent info:splitA/info:splitB columns. + * @param result + * @param which Whether "info:splitA" or "info:splitB" column + * @return Deserialized content of the info:splitA or info:splitB as a + * HRegionInfo + * @throws IOException + */ + private HRegionInfo getDaughterRegionInfo(final Result result, + final byte [] which) + throws IOException { + byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, which); + return Writables.getHRegionInfoOrNull(bytes); + } + + /** + * Remove mention of daughter from parent row. + * parent row. + * @param metaRegionName + * @param srvr + * @param parent + * @param split + * @param qualifier + * @throws IOException + */ + private void removeDaughterFromParent(final HRegionInfo parent, + final HRegionInfo split, final byte [] qualifier) + throws IOException { + MetaEditor.deleteDaughterReferenceInParent(this.server.getCatalogTracker(), + parent, qualifier, split); + } + + /** + * Checks if a daughter region -- either splitA or splitB -- still holds + * references to parent. If not, removes reference to the split from + * the parent meta region row so we don't check it any more. + * @param parent Parent region name. + * @param rowContent Keyed content of the parent row in meta region. + * @param split Which column family. + * @param qualifier Which of the daughters to look at, splitA or splitB. + * @return True if still has references to parent. + * @throws IOException + */ + boolean hasReferences(final HRegionInfo parent, + final Result rowContent, final HRegionInfo split, + final byte [] qualifier) + throws IOException { + boolean result = false; + if (split == null) return result; + FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); + Path rootdir = this.services.getMasterFileSystem().getRootDir(); + Path tabledir = new Path(rootdir, split.getTableDesc().getNameAsString()); + for (HColumnDescriptor family: split.getTableDesc().getFamilies()) { + Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(), + family.getName()); + // Look for reference files. Call listStatus with anonymous instance of PathFilter. + FileStatus [] ps = fs.listStatus(p, + new PathFilter () { + public boolean accept(Path path) { + return StoreFile.isReference(path); + } + } + ); + + if (ps != null && ps.length > 0) { + result = true; + break; + } + } + if (!result) { + removeDaughterFromParent(parent, split, qualifier); + } + return result; + } + + /** + * Interrupt thread regardless of what it's doing + */ + public void interruptAndStop() { + if (isAlive()) super.interrupt(); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 999236) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -126,10 +126,36 @@ */ public static Map fullScan(CatalogTracker catalogTracker) throws IOException { + final Map regions = + new TreeMap(); + Visitor v = new Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + Pair region = metaRowToRegionPair(r); + regions.put(region.getFirst(), region.getSecond()); + return true; + } + }; + fullScan(catalogTracker, v); + return regions; + } + + /** + * Performs a full scan of .META.. + *

+ * Returns a map of every region to it's currently assigned server, according + * to META. If the region does not have an assignment it will have a null + * value in the map. + * @param catalogTracker + * @param visitor + * @throws IOException + */ + public static void fullScan(CatalogTracker catalogTracker, + final Visitor visitor) + throws IOException { HRegionInterface metaServer = catalogTracker.waitForMetaServerConnectionDefault(); - Map allRegions = - new TreeMap(); Scan scan = new Scan(); scan.addFamily(HConstants.CATALOG_FAMILY); long scannerid = metaServer.openScanner( @@ -137,16 +163,12 @@ try { Result data; while((data = metaServer.next(scannerid)) != null) { - if (!data.isEmpty()) { - Pair region = - metaRowToRegionPair(data); - allRegions.put(region.getFirst(), region.getSecond()); - } + if (!data.isEmpty()) visitor.visit(data); } } finally { metaServer.close(scannerid); } - return allRegions; + return; } /** @@ -419,4 +441,17 @@ metaServer.close(scannerid); } } -} + + /** + * Implementations 'visit' a catalog table row. + */ + public interface Visitor { + /** + * Visit the catalog table row. + * @param r A row from catalog table + * @return True if we are to proceed scanning the table, else false if + * we are to stop now. + */ + public boolean visit(final Result r) throws IOException; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (revision 999236) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (working copy) @@ -182,13 +182,34 @@ HRegionInfo regionInfo) throws IOException { Delete delete = new Delete(regionInfo.getRegionName()); - catalogTracker.waitForMetaServerConnectionDefault().delete( - CatalogTracker.META_REGION, delete); - + catalogTracker.waitForMetaServerConnectionDefault(). + delete(CatalogTracker.META_REGION, delete); LOG.info("Deleted region " + regionInfo.getRegionNameAsString() + " from META"); } /** + * Deletes daughter reference in offlined split parent. + * @param catalogTracker + * @param parent Parent row we're to remove daughter reference from + * @param qualifier SplitA or SplitB daughter to remove + * @param daughter + * @throws NotAllMetaRegionsOnlineException + * @throws IOException + */ + public static void deleteDaughterReferenceInParent(CatalogTracker catalogTracker, + final HRegionInfo parent, final byte [] qualifier, + final HRegionInfo daughter) + throws NotAllMetaRegionsOnlineException, IOException { + Delete delete = new Delete(parent.getRegionName()); + delete.deleteColumns(HConstants.CATALOG_FAMILY, qualifier); + catalogTracker.waitForMetaServerConnectionDefault(). + delete(CatalogTracker.META_REGION, delete); + LOG.info("Deleted daughter " + daughter.getRegionNameAsString() + + " reference " + Bytes.toString(qualifier) + " from " + + parent.getRegionNameAsString() + " .META."); + } + + /** * Updates the region information for the specified region in META. * @param catalogTracker * @param regionInfo region to be updated in META