Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java (revision 0) @@ -0,0 +1,290 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.UnknownRegionException; +import org.apache.hadoop.hbase.ZooKeeperConnectionException; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Like {@link TestSplitTransaction} in that we're testing {@link SplitTransaction} + * only the below tests are against a running cluster where {@link TestSplitTransaction} + * is tests against a bare {@link HRegion}. + */ +public class TestSplitTransactionOnCluster { + private static final Log LOG = + LogFactory.getLog(TestSplitTransactionOnCluster.class); + private HBaseAdmin admin = null; + private MiniHBaseCluster cluster = null; + + private static final HBaseTestingUtility TESTING_UTIL = + new HBaseTestingUtility(); + + @BeforeClass public static void before() throws Exception { + TESTING_UTIL.startMiniCluster(2); + } + + @AfterClass public static void after() throws Exception { + TESTING_UTIL.shutdownMiniCluster(); + } + + @Before public void setup() throws IOException { + TESTING_UTIL.ensureSomeRegionServersAvailable(2); + this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration()); + this.cluster = TESTING_UTIL.getMiniHBaseCluster(); + } + + /** + * Messy test that simulates case where SplitTransactions fails to add one + * of the daughters up into the .META. table before crash. We're testing + * fact that the shutdown handler will fixup the missing daughter region + * adding it back into .META. + * @throws IOException + * @throws InterruptedException + */ + @Test (timeout = 600000) public void testShutdownSimpleFixup() + throws IOException, InterruptedException { + final byte [] tableName = Bytes.toBytes("testShutdownSimpleFixup"); + + // Create table then get the single region for our new table. + HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY); + + List regions = cluster.getRegions(tableName); + assertEquals(1, regions.size()); + HRegionInfo hri = regions.get(0).getRegionInfo(); + + int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); + + // Turn off balancer so it doesn't cut in and mess up our placements. + this.admin.balanceSwitch(false); + // Turn off the meta scanner so it don't remove parent on us. + cluster.getMaster().setCatalogJanitorEnabled(false); + try { + // Add a bit of load up into the table so splittable. + TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); + // Get region pre-split. + HRegionServer server = cluster.getRegionServer(tableRegionIndex); + printOutRegions(server, "Initial regions: "); + int regionCount = server.getOnlineRegions().size(); + // Now split. + split(hri, server, regionCount); + // Get daughters + List daughters = cluster.getRegions(tableName); + assertTrue(daughters.size() >= 2); + // Remove one of the daughters from .META. to simulate failed insert of + // daughter region up into .META. + removeDaughterFromMeta(daughters.get(0).getRegionName()); + // Now crash the server + cluster.abortRegionServer(tableRegionIndex); + while(server.getOnlineRegions().size() > 0) { + LOG.info("Waiting on server to go down"); + Thread.sleep(100); + } + // Wait till regions are back on line again. + while(cluster.getRegions(tableName).size() < daughters.size()) { + LOG.info("Waiting for repair to happen"); + Thread.sleep(1000); + } + // Assert daughters are online. + regions = cluster.getRegions(tableName); + for (HRegion r: regions) { + assertTrue(daughters.contains(r)); + } + } finally { + admin.balanceSwitch(true); + cluster.getMaster().setCatalogJanitorEnabled(true); + } + } + + /** + * Test that if daughter split on us, we won't do the shutdown handler fixup + * just because we can't find the immediate daughter of an offlined parent. + * @throws IOException + * @throws InterruptedException + */ + @Test public void testShutdownFixupWhenDaughterHasSplit() + throws IOException, InterruptedException { + final byte [] tableName = + Bytes.toBytes("testShutdownFixupWhenDaughterHasSplit"); + + // Create table then get the single region for our new table. + HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY); + + List regions = cluster.getRegions(tableName); + assertEquals(1, regions.size()); + HRegionInfo hri = regions.get(0).getRegionInfo(); + + int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); + + // Turn off balancer so it doesn't cut in and mess up our placements. + this.admin.balanceSwitch(false); + // Turn off the meta scanner so it don't remove parent on us. + cluster.getMaster().setCatalogJanitorEnabled(false); + try { + // Add a bit of load up into the table so splittable. + TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); + // Get region pre-split. + HRegionServer server = cluster.getRegionServer(tableRegionIndex); + printOutRegions(server, "Initial regions: "); + int regionCount = server.getOnlineRegions().size(); + // Now split. + split(hri, server, regionCount); + // Get daughters + List daughters = cluster.getRegions(tableName); + assertTrue(daughters.size() >= 2); + // Now split one of the daughters. + regionCount = server.getOnlineRegions().size(); + split(daughters.get(0).getRegionInfo(), server, regionCount); + // Get list of daughters + daughters = cluster.getRegions(tableName); + // Now crash the server + cluster.abortRegionServer(tableRegionIndex); + while(server.getOnlineRegions().size() > 0) { + LOG.info("Waiting on server to go down"); + Thread.sleep(100); + } + // Wait till regions are back on line again. + while(cluster.getRegions(tableName).size() < daughters.size()) { + LOG.info("Waiting for repair to happen"); + Thread.sleep(1000); + } + // Assert daughters are online and ONLY the original daughters -- that + // fixup didn't insert one during server shutdown recover. + regions = cluster.getRegions(tableName); + assertEquals(daughters.size(), regions.size()); + for (HRegion r: regions) { + assertTrue(daughters.contains(r)); + } + } finally { + admin.balanceSwitch(true); + cluster.getMaster().setCatalogJanitorEnabled(true); + } + } + + private void split(final HRegionInfo hri, final HRegionServer server, + final int regionCount) + throws IOException, InterruptedException { + this.admin.split(hri.getRegionNameAsString()); + while(server.getOnlineRegions().size() <= regionCount) { + LOG.debug("Waiting on region to split"); + Thread.sleep(100); + } + } + + private void removeDaughterFromMeta(final byte [] regionName) throws IOException { + HTable metaTable = + new HTable(TESTING_UTIL.getConfiguration(), HConstants.META_TABLE_NAME); + Delete d = new Delete(regionName); + LOG.info("Deleted " + Bytes.toString(regionName)); + metaTable.delete(d); + } + + /** + * Ensure single table region is not on same server as the single .META. table + * region. + * @param admin + * @param hri + * @return Index of the server hosting the single table region + * @throws UnknownRegionException + * @throws MasterNotRunningException + * @throws ZooKeeperConnectionException + * @throws InterruptedException + */ + private int ensureTableRegionNotOnSameServerAsMeta(final HBaseAdmin admin, + final HRegionInfo hri) + throws UnknownRegionException, MasterNotRunningException, + ZooKeeperConnectionException, InterruptedException { + MiniHBaseCluster cluster = TESTING_UTIL.getMiniHBaseCluster(); + // Now make sure that the table region is not on same server as that hosting + // .META. We don't want .META. replay polluting our test when we later crash + // the table region serving server. + int metaServerIndex = cluster.getServerWithMeta(); + assertTrue(metaServerIndex != -1); + HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex); + int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); + assertTrue(tableRegionIndex != -1); + HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); + if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { + HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); + admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName())); + } + // Wait till table region is up on the server that is NOT carrying .META.. + while (true) { + tableRegionIndex = cluster.getServerWith(hri.getRegionName()); + if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; + LOG.debug("Waiting on region move off the .META. server; current index " + + tableRegionIndex); + Thread.sleep(100); + } + // Verify for sure table region is not on same server as .META. + tableRegionIndex = cluster.getServerWith(hri.getRegionName()); + assertTrue(tableRegionIndex != -1); + assertNotSame(metaServerIndex, tableRegionIndex); + return tableRegionIndex; + } + + /** + * Find regionserver other than the one passed. + * Can't rely on indexes into list of regionservers since crashed servers + * occupy an index. + * @param cluster + * @param notThisOne + * @return A regionserver that is not notThisOne or null if none + * found + */ + private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster, + final HRegionServer notThisOne) { + for (RegionServerThread rst: cluster.getRegionServerThreads()) { + HRegionServer hrs = rst.getRegionServer(); + if (hrs.getServerName().equals(notThisOne.getServerName())) continue; + return hrs; + } + return null; + } + + private void printOutRegions(final HRegionServer hrs, final String prefix) { + List regions = hrs.getOnlineRegions(); + for (HRegionInfo region: regions) { + LOG.info(prefix + region.getRegionNameAsString()); + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (revision 1055985) +++ src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (working copy) @@ -51,6 +51,7 @@ private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName()); private final Server server; private final MasterServices services; + private boolean enabled = true; CatalogJanitor(final Server server, final MasterServices services) { super(server.getServerName() + "-CatalogJanitor", @@ -63,7 +64,7 @@ @Override protected boolean initialChore() { try { - scan(); + if (this.enabled) scan(); } catch (IOException e) { LOG.warn("Failed initial scan of catalog table", e); return false; @@ -71,6 +72,15 @@ return true; } + /** + * @param enabled + * @return New state of the enabled/disabled flag. + */ + public boolean setEnabled(final boolean enabled) { + this.enabled = enabled; + return this.enabled; + } + @Override protected void chore() { try { Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1055985) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -741,6 +741,17 @@ return oldValue; } + /** + * Switch for the background {@link CatalogJanitor} thread. + * Used testing. The thread will continue to run. It will just be a noop + * if disabled. + * @param b If false, the catalog janitor won't do anything. + * @return New state of the catalog janitor flag. + */ + public boolean setCatalogJanitorEnabled(final boolean b) { + return ((CatalogJanitor)this.catalogJanitorChore).setEnabled(b); + } + @Override public void move(final byte[] encodedRegionName, final byte[] destServerName) throws UnknownRegionException { Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1055985) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -28,7 +28,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.catalog.CatalogTracker; @@ -37,11 +36,11 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; import org.apache.hadoop.hbase.master.DeadServer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerManager; -import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; import org.apache.zookeeper.KeeperException; @@ -210,18 +209,95 @@ final AssignmentManager assignmentManager, final CatalogTracker catalogTracker) throws IOException { - byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, qualifier); - if (bytes == null || bytes.length <= 0) return; - HRegionInfo hri = Writables.getHRegionInfoOrNull(bytes); - if (hri == null) return; - Pair pair = - MetaReader.getRegion(catalogTracker, hri.getRegionName()); - if (pair == null || pair.getFirst() == null) { - LOG.info("Fixup; missing daughter " + hri.getEncodedName()); - MetaEditor.addDaughter(catalogTracker, hri, null); - assignmentManager.assign(hri, true); + HRegionInfo daughter = getHRegionInfo(result, qualifier); + if (daughter == null) return; + if (isDaughterMissing(catalogTracker, daughter)) { + LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); + MetaEditor.addDaughter(catalogTracker, daughter, null); + // And assign it. + assignmentManager.assign(daughter, true); } else { - LOG.debug("Daughter " + hri.getRegionNameAsString() + " present"); + LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present"); } } -} + + /** + * Interpret the content of the cell at {@link HConstants#CATALOG_FAMILY} and + * qualifier as an HRegionInfo and return it, or null. + * @param r Result instance to pull from. + * @param qualifier Column family qualifier + * @return An HRegionInfo instance or null. + * @throws IOException + */ + private static HRegionInfo getHRegionInfo(final Result r, byte [] qualifier) + throws IOException { + byte [] bytes = r.getValue(HConstants.CATALOG_FAMILY, qualifier); + if (bytes == null || bytes.length <= 0) return null; + return Writables.getHRegionInfoOrNull(bytes); + } + + /** + * Look for presence of the daughter OR of a split of the daughter. Daughter + * could have been split over on regionserver before a run of the + * catalogJanitor had chance to clear reference from parent. + * @param daughter Daughter region to search for. + * @throws IOException + */ + private static boolean isDaughterMissing(final CatalogTracker catalogTracker, + final HRegionInfo daughter) throws IOException { + IsDaughterVisitor visitor = new IsDaughterVisitor(daughter); + // Start the scan at the what should be the daughter's row in the .META. + // We will either 1., find the daughter or some derivative split of the + // daughter (will have same table name and start row at least but will sort + // after because has larger regionid -- the regionid is timestamp of region + // creation), OR, we will not find anything with same table name and start + // row. If the latter, then assume daughter missing and do fixup. + byte [] startrow = daughter.getRegionName(); + MetaReader.fullScan(catalogTracker, visitor, startrow); + return !visitor.isDaughter(); + } + + /** + * Looks for daughter. Sets a flag if daughter or some progeny of daughter + * is found up in .META.. + */ + static class IsDaughterVisitor implements MetaReader.Visitor { + private final HRegionInfo daughter; + private boolean found = false; + + IsDaughterVisitor(final HRegionInfo daughter) { + this.daughter = daughter; + } + + /** + * @return True if we found a daughter region during our visiting. + */ + boolean isDaughter() { + return this.found; + } + + @Override + public boolean visit(Result r) throws IOException { + HRegionInfo hri = getHRegionInfo(r, HConstants.REGIONINFO_QUALIFIER); + if (hri == null) { + LOG.warn("No serialized HRegionInfo in " + r); + return true; + } + // Now see if we have gone beyond the daughter's startrow. + if (!Bytes.equals(daughter.getTableDesc().getName(), + hri.getTableDesc().getName())) { + // We fell into another table. Stop scanning. + return false; + } + // If our start rows do not compare, move on. + if (!Bytes.equals(daughter.getStartKey(), hri.getStartKey())) { + return false; + } + // Else, table name and start rows compare. It means that the daughter + // or some derivative split of the daughter is up in .META. Daughter + // exists. + this.found = true; + return false; + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 1055985) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -234,9 +234,28 @@ public static void fullScan(CatalogTracker catalogTracker, final Visitor visitor) throws IOException { + fullScan(catalogTracker, visitor, null); + } + + /** + * Performs a full scan of .META.. + *

+ * Returns a map of every region to it's currently assigned server, according + * to META. If the region does not have an assignment it will have a null + * value in the map. + * @param catalogTracker + * @param visitor + * @param startrow Where to start the scan. Pass null if want to begin scan + * at first row. + * @throws IOException + */ + public static void fullScan(CatalogTracker catalogTracker, + final Visitor visitor, final byte [] startrow) + throws IOException { HRegionInterface metaServer = catalogTracker.waitForMetaServerConnectionDefault(); Scan scan = new Scan(); + if (startrow != null) scan.setStartRow(startrow); scan.addFamily(HConstants.CATALOG_FAMILY); long scannerid = metaServer.openScanner( HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), scan); Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (revision 1055985) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (working copy) @@ -77,10 +77,6 @@ copyOfParent.setSplit(true); Put put = new Put(copyOfParent.getRegionName()); addRegionInfo(put, copyOfParent); - put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, - HConstants.EMPTY_BYTE_ARRAY); - put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, - HConstants.EMPTY_BYTE_ARRAY); put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER, Writables.getBytes(a)); put.add(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER,