Index: src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java (revision 0) @@ -0,0 +1,368 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.Chore; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Store; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.ipc.RemoteException; + + +/** + * A janitor for the catalog tables. + */ +abstract class BaseScanner extends Chore { + private static final Log LOG = LogFactory.getLog(BaseScanner.class.getName()); + + // Make the 'true' Writable once only. + private static byte [] TRUE_WRITABLE_AS_BYTES; + static { + try { + TRUE_WRITABLE_AS_BYTES = Writables.getBytes(new BooleanWritable(true)); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private final Server server; + private final MasterServices services; + private boolean initialScan = false; + + // will use this variable to synchronize and make sure we aren't interrupted + // mid-scan + final Object scannerLock = new Object(); + + BaseScanner(final Server server, final MasterServices services, + final int period) { + super(server.getServerName() + "-CatalogJanitor", period, server); + this.server = server; + this.services = services; + } + + /** @return true if initial scan completed successfully */ + public boolean isInitialScanComplete() { + return this.initialScan; + } + + @Override + protected boolean initialChore() { + boolean result = initialScan(); + this.initialScan = true; + return result; + } + + boolean initialScan() { + MetaReader. + return true; + } + + @Override + protected void chore() { + maintenanceScan(); + } + + void maintenanceScan() { + + } + + void scan() throws IOException { + final AtomicInteger count = new AtomicInteger(0); + final Map splitParents = + new TreeMap(); + MetaReader.Visitor visitor = new MetaReader.Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + count.incrementAndGet(); + HRegionInfo info = getHRegionInfo(r); + if (isSplitParent(info)) { + splitParents.put(info, r); + } + return true; + } + }; + MetaReader.fullScan(this.server.getCatalogTracker(), visitor); + // Now work on our list of parents. See if any we can clean up. + for (Map.Entry e : splitParents.entrySet()) { + HRegionInfo hri = e.getKey(); + cleanParents(hri, e.getValue()); + } + LOG.info("Scanned " + count.get() + " row(s)"); + } + + /** + * Get HRegionInfo from passed META map of row values. + * Returns null if none found (and logs fact that expected COL_REGIONINFO + * was missing). + * @param result Map to do lookup in. + * @return Null or found HRegionInfo. + * @throws IOException + */ + private HRegionInfo getHRegionInfo(final Result result) + throws IOException { + byte [] bytes = + result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); + if (bytes != null) { + LOG.warn("REGIONINFO_QUALIFIER is empty in " + result); + return null; + } + return Writables.getHRegionInfo(bytes); + } + + /** + * @param hri Region to check. + * @return True if this is a split parent. + */ + private boolean isSplitParent(final HRegionInfo hri) { + if (!hri.isSplit()) return false; + if (!hri.isOffline()) { + LOG.warn("Region is split but NOT offline: " + hri.getRegionNameAsString()); + } + return true; + } + + /* + * If daughters no longer hold reference to the parents, delete the parent. + * @param server HRegionInterface of meta server to talk to + * @param parent HRegionInfo of split offlined parent + * @param rowContent Content of parent row in + * metaRegionName + * @return True if we removed parent from meta table and from + * the filesystem. + * @throws IOException + */ + private boolean cleanParents(final HRegionInfo parent, + Result rowContent) + throws IOException { + boolean result = false; + // Run checks on each daughter split. + boolean hasReferencesA = checkDaughter(metaRegionName, srvr, + parent, rowContent, SPLITA_QUALIFIER); + boolean hasReferencesB = checkDaughter(metaRegionName, srvr, + parent, rowContent, SPLITB_QUALIFIER); + if (!hasReferencesA && !hasReferencesB) { + LOG.info("Deleting region " + parent.getRegionNameAsString() + + " (encoded=" + parent.getEncodedName() + + ") because daughter splits no longer hold references"); + HRegion.deleteRegion(this.master.fs, this.master.rootdir, parent); + HRegion.removeRegionFromMETA(srvr, metaRegionName, + parent.getRegionName()); + result = true; + } + return result; + } + + + /* + * See if the passed daughter has references in the filesystem to the parent + * and if not, remove the note of daughter region in the parent row: its + * column info:splitA or info:splitB. Also make sure that daughter row is + * present in the .META. and mark the parent row when confirmed so we don't + * keep checking. The mark will be info:splitA_checked and its value will be + * a true BooleanWritable. + * @param parent + * @param rowContent + * @param qualifier + * @return True if this daughter still has references to the parent. + * @throws IOException + */ + private boolean checkDaughter(final HRegionInfo parent, + final Result rowContent, final byte [] qualifier) + throws IOException { + HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier); + boolean references = hasReferences(metaRegionName, srvr, parent, rowContent, + hri, qualifier); + // Return if no references. + if (!references) return references; + if (!verifyDaughterRowPresent(rowContent, qualifier, srvr, metaRegionName, + hri, parent)) { + // If we got here, then the parent row does not yet have the + // "daughter row verified present" marker present. Add it. + addDaughterRowChecked(metaRegionName, srvr, parent.getRegionName(), hri, + qualifier); + } + return references; + } + + /* + * Check the daughter of parent is present in meta table. If not there, + * add it. + * @param rowContent + * @param daughter + * @param srvr + * @param metaRegionName + * @param daughterHRI + * @throws IOException + * @return True, if parent row has marker for "daughter row verified present" + * else, false (and will do fixup adding daughter if daughter not present). + */ + private boolean verifyDaughterRowPresent(final Result rowContent, + final byte [] daughter, final HRegionInterface srvr, + final byte [] metaRegionName, + final HRegionInfo daughterHRI, final HRegionInfo parent) + throws IOException { + // See if the 'checked' column is in parent. If so, we're done. + boolean present = getDaughterRowChecked(rowContent, daughter); + if (present) return present; + // Parent is not carrying the splitA_checked/splitB_checked so this must + // be the first time through here checking splitA/splitB are in metatable. + byte [] daughterRowKey = daughterHRI.getRegionName(); + Get g = new Get(daughterRowKey); + g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); + Result r = srvr.get(metaRegionName, g); + if (r == null || r.isEmpty()) { + // Daughter row not present. Fixup kicks in. Insert it. + LOG.warn("Fixup broke split: Add missing split daughter to meta," + + " daughter=" + daughterHRI.toString() + ", parent=" + parent.toString()); + Put p = new Put(daughterRowKey); + p.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, + Writables.getBytes(daughterHRI)); + srvr.put(metaRegionName, p); + } + return present; + } + + /** + * Get daughter HRegionInfo out of parent info:splitA/info:splitB columns. + * @param result + * @param which Whether "info:splitA" or "info:splitB" column + * @return Deserialized content of the info:splitA or info:splitB as a + * HRegionInfo + * @throws IOException + */ + private HRegionInfo getDaughterRegionInfo(final Result result, + final byte [] which) + throws IOException { + return Writables.getHRegionInfoOrNull(result.getValue(HConstants.CATALOG_FAMILY, which)); + } + + /* + * Remove mention of daughter from parent row. + * parent row. + * @param metaRegionName + * @param srvr + * @param parent + * @param split + * @param qualifier + * @throws IOException + */ + private void removeDaughterFromParent(final byte [] metaRegionName, + final HRegionInterface srvr, final HRegionInfo parent, + final HRegionInfo split, final byte [] qualifier) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug(split.getRegionNameAsString() + "/" + split.getEncodedName() + + " no longer has references to " + parent.getRegionNameAsString()); + } + Delete delete = new Delete(parent.getRegionName()); + delete.deleteColumns(HConstants.CATALOG_FAMILY, qualifier); + srvr.delete(metaRegionName, delete); + } + + /* + * Checks if a daughter region -- either splitA or splitB -- still holds + * references to parent. If not, removes reference to the split from + * the parent meta region row so we don't check it any more. + * @param parent Parent region name. + * @param rowContent Keyed content of the parent row in meta region. + * @param split Which column family. + * @param qualifier Which of the daughters to look at, splitA or splitB. + * @return True if still has references to parent. + * @throws IOException + */ + private boolean hasReferences(final HRegionInfo parent, + final Result rowContent, final HRegionInfo split, + final byte [] qualifier) + throws IOException { + boolean result = false; + if (split == null) return result; + FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); + Path rootdir = this.services.getMasterFileSystem().getRootDir(); + Path tabledir = new Path(rootdir, split.getTableDesc().getNameAsString()); + for (HColumnDescriptor family: split.getTableDesc().getFamilies()) { + Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(), + family.getName()); + // Look for reference files. Call listStatus with an anonymous + // instance of PathFilter. + FileStatus [] ps = fs.listStatus(p, + new PathFilter () { + public boolean accept(Path path) { + return StoreFile.isReference(path); + } + } + ); + + if (ps != null && ps.length > 0) { + result = true; + break; + } + } + if (!result) { + removeDaughterFromParent(metaRegionName, srvr, parent, split, qualifier); + } + return result; + } + + /** + * Interrupt thread regardless of what it's doing + */ + public void interruptAndStop() { + synchronized(scannerLock){ + if (isAlive()) { + super.interrupt(); + } + } + } +} Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 1000277) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -126,10 +126,36 @@ */ public static Map fullScan(CatalogTracker catalogTracker) throws IOException { + final Map regions = + new TreeMap(); + Visitor v = new Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + Pair region = metaRowToRegionPair(r); + regions.put(region.getFirst(), region.getSecond()); + return true; + } + }; + fullScan(catalogTracker, v); + return regions; + } + + /** + * Performs a full scan of .META.. + *

+ * Returns a map of every region to it's currently assigned server, according + * to META. If the region does not have an assignment it will have a null + * value in the map. + * @param catalogTracker + * @param visitor + * @throws IOException + */ + public static void fullScan(CatalogTracker catalogTracker, + final Visitor visitor) + throws IOException { HRegionInterface metaServer = catalogTracker.waitForMetaServerConnectionDefault(); - Map allRegions = - new TreeMap(); Scan scan = new Scan(); scan.addFamily(HConstants.CATALOG_FAMILY); long scannerid = metaServer.openScanner( @@ -137,16 +163,12 @@ try { Result data; while((data = metaServer.next(scannerid)) != null) { - if (!data.isEmpty()) { - Pair region = - metaRowToRegionPair(data); - allRegions.put(region.getFirst(), region.getSecond()); - } + if (!data.isEmpty()) visitor.visit(data); } } finally { metaServer.close(scannerid); } - return allRegions; + return; } /** @@ -423,4 +445,17 @@ metaServer.close(scannerid); } } -} + + /** + * Implementations 'visit' a catalog table row. + */ + public interface Visitor { + /** + * Visit the catalog table row. + * @param r A row from catalog table + * @return True if we are to proceed scanning the table, else false if + * we are to stop now. + */ + public boolean visit(final Result r) throws IOException; + } +} \ No newline at end of file