Index: src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 1006040) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -214,6 +214,11 @@ System.setProperty("test.cache.data", this.clusterTestBuildDir.toString()); this.dfsCluster = new MiniDFSCluster(0, this.conf, servers, true, true, true, null, null, null, null); + // Set this just-started cluser as our filesystem. + FileSystem fs = this.dfsCluster.getFileSystem(); + this.conf.set("fs.defaultFS", fs.getUri().toString()); + // Do old style too just to be safe. + this.conf.set("fs.default.name", fs.getUri().toString()); return this.dfsCluster; } @@ -318,7 +323,7 @@ // If we already put up a cluster, fail. String testBuildPath = conf.get(TEST_DIRECTORY_KEY, null); isRunningCluster(testBuildPath); - if(testBuildPath != null) { + if (testBuildPath != null) { LOG.info("Using passed path: " + testBuildPath); } // Make a new random dir to home everything in. Set it as system property. @@ -329,24 +334,30 @@ // Bring up mini dfs cluster. This spews a bunch of warnings about missing // scheme. Complaints are 'Scheme is undefined for build/test/data/dfs/name1'. startMiniDFSCluster(numSlaves, this.clusterTestBuildDir); - - // Mangle conf so fs parameter points to minidfs we just started up - FileSystem fs = this.dfsCluster.getFileSystem(); - this.conf.set("fs.defaultFS", fs.getUri().toString()); - // Do old style too just to be safe. - this.conf.set("fs.default.name", fs.getUri().toString()); this.dfsCluster.waitClusterUp(); // Start up a zk cluster. if (this.zkCluster == null) { startMiniZKCluster(this.clusterTestBuildDir); } + return startMiniHBaseCluster(numMasters, numSlaves); + } + /** + * Starts up mini hbase cluster. Usually used after call to + * {@link #startMiniCluster(int, int)} when doing stepped startup of clusters. + * Usually you won't want this. You'll usually want {@link #startMiniCluster()}. + * @param numMasters + * @param numSlaves + * @return Reference to the hbase mini hbase cluster. + * @throws IOException + * @see {@link #startMiniCluster()} + */ + public MiniHBaseCluster startMiniHBaseCluster(final int numMasters, + final int numSlaves) + throws IOException { // Now do the mini hbase cluster. Set the hbase.rootdir in config. - Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory()); - this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); - fs.mkdirs(hbaseRootdir); - FSUtils.setVersion(fs, hbaseRootdir); + createRootDir(); Configuration c = new Configuration(this.conf); this.hbaseCluster = new MiniHBaseCluster(c, numMasters, numSlaves); // Don't leave here till we've done a successful scan of the .META. @@ -386,6 +397,7 @@ } /** + * Stops mini hbase, zk, and hdfs clusters. * @throws IOException * @see {@link #startMiniCluster(int)} */ @@ -414,6 +426,23 @@ } /** + * Creates an hbase rootdir in user home directory. Also creates hbase + * version file. Normally you won't make use of this method. Root hbasedir + * is created for you as part of mini cluster startup. You'd only use this + * method if you were doing manual operation. + * @return Fully qualified path to hbase root dir + * @throws IOException + */ + public Path createRootDir() throws IOException { + FileSystem fs = FileSystem.get(this.conf); + Path hbaseRootdir = fs.makeQualified(fs.getHomeDirectory()); + this.conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); + fs.mkdirs(hbaseRootdir); + FSUtils.setVersion(fs, hbaseRootdir); + return hbaseRootdir; + } + + /** * Flushes all caches in the mini hbase cluster * @throws IOException */ Index: src/test/java/org/apache/hadoop/hbase/AbstractMergeTestBase.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/AbstractMergeTestBase.java (revision 1006040) +++ src/test/java/org/apache/hadoop/hbase/AbstractMergeTestBase.java (working copy) @@ -1,143 +0,0 @@ -/** - * Copyright 2007 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.Random; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.util.Bytes; - -/** Abstract base class for merge tests */ -public abstract class AbstractMergeTestBase extends HBaseClusterTestCase { - static final Log LOG = - LogFactory.getLog(AbstractMergeTestBase.class.getName()); - static final byte [] COLUMN_NAME = Bytes.toBytes("contents"); - protected final Random rand = new Random(); - protected HTableDescriptor desc; - protected ImmutableBytesWritable value; - protected boolean startMiniHBase; - - public AbstractMergeTestBase() { - this(true); - } - - /** constructor - * @param startMiniHBase - */ - public AbstractMergeTestBase(boolean startMiniHBase) { - super(); - - this.startMiniHBase = startMiniHBase; - - // We will use the same value for the rows as that is not really important here - - String partialValue = String.valueOf(System.currentTimeMillis()); - StringBuilder val = new StringBuilder(); - while(val.length() < 1024) { - val.append(partialValue); - } - - try { - value = new ImmutableBytesWritable( - val.toString().getBytes(HConstants.UTF8_ENCODING)); - } catch (UnsupportedEncodingException e) { - fail(); - } - desc = new HTableDescriptor(Bytes.toBytes("test")); - desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); - } - - @Override - protected void hBaseClusterSetup() throws Exception { - if (startMiniHBase) { - super.hBaseClusterSetup(); - } - } - - @Override - public void preHBaseClusterSetup() throws Exception { - conf.setLong("hbase.hregion.max.filesize", 64L * 1024L * 1024L); - - // We create three data regions: The first is too large to merge since it - // will be > 64 MB in size. The second two will be smaller and will be - // selected for merging. - - // To ensure that the first region is larger than 64MB we need to write at - // least 65536 rows. We will make certain by writing 70000 - - byte [] row_70001 = Bytes.toBytes("row_70001"); - byte [] row_80001 = Bytes.toBytes("row_80001"); - - // XXX: Note that the number of rows we put in is different for each region - // because currently we don't have a good mechanism to handle merging two - // store files with the same sequence id. We can't just dumbly stick them - // in because it will screw up the order when the store files are loaded up. - // The sequence ids are used for arranging the store files, so if two files - // have the same id, one will overwrite the other one in our listing, which - // is very bad. See HBASE-1212 and HBASE-1274. - HRegion[] regions = { - createAregion(null, row_70001, 1, 70000), - createAregion(row_70001, row_80001, 70001, 10000), - createAregion(row_80001, null, 80001, 11000) - }; - - // Now create the root and meta regions and insert the data regions - // created above into the meta - - createRootAndMetaRegions(); - - for(int i = 0; i < regions.length; i++) { - HRegion.addRegionToMETA(meta, regions[i]); - } - - closeRootAndMeta(); - } - - private HRegion createAregion(byte [] startKey, byte [] endKey, int firstRow, - int nrows) throws IOException { - - HRegion region = createNewHRegion(desc, startKey, endKey); - - System.out.println("created region " + - Bytes.toString(region.getRegionName())); - - HRegionIncommon r = new HRegionIncommon(region); - for(int i = firstRow; i < firstRow + nrows; i++) { - Put put = new Put(Bytes.toBytes("row_" - + String.format("%1$05d", i))); - put.add(COLUMN_NAME, null, value.get()); - region.put(put); - if(i % 10000 == 0) { - System.out.println("Flushing write #" + i); - r.flushcache(); - } - } - region.close(); - region.getLog().closeAndDelete(); - region.getRegionInfo().setOffline(true); - return region; - } -} Index: src/test/java/org/apache/hadoop/hbase/util/TestMergeMeta.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/TestMergeMeta.java (revision 1006040) +++ src/test/java/org/apache/hadoop/hbase/util/TestMergeMeta.java (working copy) @@ -1,48 +0,0 @@ -/** - * Copyright 2007 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.util; - -import java.io.IOException; - -import org.apache.hadoop.hbase.AbstractMergeTestBase; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.util.HMerge; - -/** Tests region merging */ -public class TestMergeMeta extends AbstractMergeTestBase { - - /** constructor - * @throws Exception - */ - public TestMergeMeta() throws Exception { - super(false); - conf.setLong("hbase.client.pause", 1 * 1000); - conf.setInt("hbase.client.retries.number", 2); - } - - /** - * test case - * @throws IOException - */ - public void testMergeMeta() throws IOException { - assertNotNull(dfsCluster); - HMerge.merge(conf, dfsCluster.getFileSystem(), HConstants.META_TABLE_NAME, false); - } -} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/util/TestMergeTable.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/TestMergeTable.java (revision 1006065) +++ src/test/java/org/apache/hadoop/hbase/util/TestMergeTable.java (working copy) @@ -19,27 +19,145 @@ */ package org.apache.hadoop.hbase.util; +import static org.junit.Assert.assertTrue; + import java.io.IOException; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.AbstractMergeTestBase; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.util.HMerge; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.junit.Test; /** * Tests merging a normal table's regions */ -public class TestMergeTable extends AbstractMergeTestBase { +public class TestMergeTable { + private static final Log LOG = LogFactory.getLog(TestMergeTable.class); + private final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final byte [] COLUMN_NAME = Bytes.toBytes("contents"); + private static final byte [] VALUE; + static { + // We will use the same value for the rows as that is not really important here + String partialValue = String.valueOf(System.currentTimeMillis()); + StringBuilder val = new StringBuilder(); + while (val.length() < 1024) { + val.append(partialValue); + } + VALUE = Bytes.toBytes(val.toString()); + } /** - * Test case - * @throws IOException + * Test merge. + * Hand-makes regions of a mergeable size and adds the hand-made regions to + * hand-made meta. The hand-made regions are created offline. We then start + * up mini cluster, disables the hand-made table and starts in on merging. + * @throws Exception */ - public void testMergeTable() throws IOException { - assertNotNull(dfsCluster); - Configuration c = new Configuration(this.conf); - HBaseAdmin admin = new HBaseAdmin(c); - admin.disableTable(desc.getName()); - HMerge.merge(c, dfsCluster.getFileSystem(), desc.getName()); + @Test public void testMergeTable() throws Exception { + // Table we are manually creating offline. + HTableDescriptor desc = new HTableDescriptor(Bytes.toBytes("test")); + desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); + + // Set maximum regionsize down. + UTIL.getConfiguration().setLong("hbase.hregion.max.filesize", 64L * 1024L * 1024L); + // Startup hdfs. Its in here we'll be putting our manually made regions. + UTIL.startMiniDFSCluster(1); + // Create hdfs hbase rootdir. + Path rootdir = UTIL.createRootDir(); + + // Now create three data regions: The first is too large to merge since it + // will be > 64 MB in size. The second two will be smaller and will be + // selected for merging. + + // To ensure that the first region is larger than 64MB we need to write at + // least 65536 rows. We will make certain by writing 70000 + byte [] row_70001 = Bytes.toBytes("row_70001"); + byte [] row_80001 = Bytes.toBytes("row_80001"); + + // Create regions and populate them at same time. + HRegion [] regions = { + createRegion(desc, null, row_70001, 1, 70000, rootdir), + createRegion(desc, row_70001, row_80001, 70001, 10000, rootdir), + createRegion(desc, row_80001, null, 80001, 11000, rootdir) + }; + + // Now create the root and meta regions and insert the data regions + // created above into .META. + setupROOTAndMeta(rootdir, regions); + try { + LOG.info("Starting mini zk cluster"); + UTIL.startMiniZKCluster(); + LOG.info("Starting mini hbase cluster"); + UTIL.startMiniHBaseCluster(1, 1); + Configuration c = new Configuration(UTIL.getConfiguration()); + HConnection connection = HConnectionManager.getConnection(c); + CatalogTracker ct = new CatalogTracker(connection); + ct.start(); + List originalTableRegions = + MetaReader.getTableRegions(ct, desc.getName()); + LOG.info("originalTableRegions size=" + originalTableRegions.size() + + "; " + originalTableRegions); + HBaseAdmin admin = new HBaseAdmin(new Configuration(c)); + admin.disableTable(desc.getName()); + HMerge.merge(c, FileSystem.get(c), desc.getName()); + List postMergeTableRegions = + MetaReader.getTableRegions(ct, desc.getName()); + LOG.info("postMergeTableRegions size=" + postMergeTableRegions.size() + + "; " + postMergeTableRegions); + assertTrue(postMergeTableRegions.size() < originalTableRegions.size()); + } finally { + UTIL.shutdownMiniCluster(); + } } + + private HRegion createRegion(final HTableDescriptor desc, + byte [] startKey, byte [] endKey, int firstRow, int nrows, Path rootdir) + throws IOException { + HRegionInfo hri = new HRegionInfo(desc, startKey, endKey); + HRegion region = HRegion.createHRegion(hri, rootdir, UTIL.getConfiguration()); + LOG.info("Created region " + region.getRegionNameAsString()); + for(int i = firstRow; i < firstRow + nrows; i++) { + Put put = new Put(Bytes.toBytes("row_" + String.format("%1$05d", i))); + put.add(COLUMN_NAME, null, VALUE); + region.put(put); + if (i % 10000 == 0) { + LOG.info("Flushing write #" + i); + region.flushcache(); + } + } + region.close(); + region.getLog().closeAndDelete(); + return region; + } + + protected void setupROOTAndMeta(Path rootdir, final HRegion [] regions) + throws IOException { + HRegion root = + HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, rootdir, UTIL.getConfiguration()); + HRegion meta = + HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, rootdir, + UTIL.getConfiguration()); + HRegion.addRegionToMETA(root, meta); + for (HRegion r: regions) { + HRegion.addRegionToMETA(meta, r); + } + meta.close(); + meta.getLog().closeAndDelete(); + root.close(); + root.getLog().closeAndDelete(); + } } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1006107) +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (working copy) @@ -71,6 +71,18 @@ HRegionInfo.FIRST_META_REGIONINFO.getRegionName(); /** + * Constructs a catalog tracker. Find current state of catalog tables and + * begin active tracking by executing {@link #start()} post construction. + * Does not timeout. + * @param connection Server connection; if problem, this connections + * {@link HConnection#abort(String, Throwable)} will be called. + * @throws IOException + */ + public CatalogTracker(final HConnection connection) throws IOException { + this(connection.getZooKeeperWatcher(), connection, connection); + } + + /** * Constructs the catalog tracker. Find current state of catalog tables and * begin active tracking by executing {@link #start()} post construction. * Does not timeout. @@ -274,7 +286,7 @@ * for up to the specified timeout if not immediately available. Throws an * exception if timed out waiting. This method differs from {@link #waitForMeta()} * in that it will go ahead and verify the location gotten from ZooKeeper by - * trying trying to use returned connection. + * trying to use returned connection. * @param timeout maximum time to wait for meta availability, in milliseconds * @return location of meta * @throws InterruptedException if interrupted while waiting Index: src/main/java/org/apache/hadoop/hbase/util/HMerge.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/HMerge.java (revision 1006040) +++ src/main/java/org/apache/hadoop/hbase/util/HMerge.java (working copy) @@ -154,7 +154,7 @@ void process() throws IOException { try { - for(HRegionInfo[] regionsToMerge = next(); + for (HRegionInfo[] regionsToMerge = next(); regionsToMerge != null; regionsToMerge = next()) { if (!merge(regionsToMerge)) { @@ -172,7 +172,7 @@ } protected boolean merge(final HRegionInfo[] info) throws IOException { - if(info.length < 2) { + if (info.length < 2) { LOG.info("only one region - nothing to merge"); return false; } @@ -196,8 +196,8 @@ if ((currentSize + nextSize) <= (maxFilesize / 2)) { // We merge two adjacent regions if their total size is less than // one half of the desired maximum size - LOG.info("merging regions " + Bytes.toString(currentRegion.getRegionName()) - + " and " + Bytes.toString(nextRegion.getRegionName())); + LOG.info("Merging regions " + currentRegion.getRegionNameAsString() + + " and " + nextRegion.getRegionNameAsString()); HRegion mergedRegion = HRegion.mergeAdjacent(currentRegion, nextRegion); updateMeta(currentRegion.getRegionName(), nextRegion.getRegionName(), Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1006040) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -40,7 +40,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -181,7 +180,7 @@ } /* Encapsulates connection to zookeeper and regionservers.*/ - static class HConnectionImplementation implements HConnection, Abortable { + static class HConnectionImplementation implements HConnection { static final Log LOG = LogFactory.getLog(HConnectionImplementation.class); private final Class serverInterfaceClass; private final long pause; Index: src/main/java/org/apache/hadoop/hbase/client/HConnection.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnection.java (revision 1006040) +++ src/main/java/org/apache/hadoop/hbase/client/HConnection.java (working copy) @@ -25,6 +25,7 @@ import java.util.Map; import java.util.concurrent.ExecutorService; +import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; @@ -39,7 +40,7 @@ * Cluster connection. * {@link HConnectionManager} manages instances of this class. */ -public interface HConnection { +public interface HConnection extends Abortable { /** * Retrieve ZooKeeperWatcher used by the connection. * @return ZooKeeperWatcher handle being used by the connection.