diff --git src/docbkx/ops_mgt.xml src/docbkx/ops_mgt.xml index d3035a4..ac00c5e 100644 --- src/docbkx/ops_mgt.xml +++ src/docbkx/ops_mgt.xml @@ -93,9 +93,9 @@ endtime End of the time range. Without endtime means starttime to forever. versions Number of cell versions to copy. new.name New table's name. - peer.adr Address of the peer cluster given in the format hbase.zookeeper.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent + peer.adr Address of the peer cluster given in the format hbase.zookeeper.quorum:hbase.zookeeper.clientPort:zookeeper.znode.parent families Comma-separated list of ColumnFamilies to copy. - all.cells Also copy delete markers and uncollected deleted cells (advanced option). + all.cells Also copy delete markers and uncollected deleted cells (advanced option). If used the 'families' argument must be omitted. Args: diff --git src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index 798f227..f692648 100644 --- src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -19,17 +19,23 @@ */ package org.apache.hadoop.hbase.mapreduce; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.GenericOptionsParser; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; +// TODO: clean up CopyTable class. Member variables that don't need to be public should be made +// private. More importantly, keeping configuration state as static member variables is confusing +// and leads to a situation where a user can unknowingly bring conflicting configuration settings from one +// run on to the next, even when using a new CopyTable instance, potentially leading to subtle and +// confusing bugs. /** * Tool used to copy a table to another one which can be on a different setup. @@ -125,11 +131,13 @@ public class CopyTable { System.err.println(" versions number of cell versions to copy"); System.err.println(" new.name new table's name"); System.err.println(" peer.adr Address of the peer cluster given in the format"); - System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent"); + System.err + .println(" hbase.zookeeper.quorum:hbase.zookeeper.property.clientPort:zookeeper.znode.parent"); System.err.println(" families comma-separated list of families to copy"); System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. "); System.err.println(" To keep the same name, just give \"cfName\""); - System.err.println(" all.cells also copy delete markers and deleted cells"); + System.err + .println(" all.cells also copy delete markers and deleted cells. If used the 'families' argument must be omitted."); System.err.println(); System.err.println("Args:"); System.err.println(" tablename Name of the table to copy"); @@ -219,6 +227,10 @@ public class CopyTable { "peer address must be specified"); return false; } + if (allCells == true && families != null) { + printUsage("You cannot use the all.cells flag when also specifying families"); + return false; + } } catch (Exception e) { e.printStackTrace(); printUsage("Can't start because " + e.getMessage()); diff --git src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java new file mode 100644 index 0000000..77c9f1a --- /dev/null +++ src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapreduce; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.ZooKeeperConnectionException; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.GenericOptionsParser; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestCopyTable { + + private static Configuration conf1; + private static Configuration conf2; + private static HBaseTestingUtility utility1; + private static HBaseTestingUtility utility2; + private static MiniZooKeeperCluster miniZK; + private static final byte[] ROW1 = Bytes.toBytes("row1"); + private static final byte[] ROW2 = Bytes.toBytes("row2"); + private static final String FAMILYA_STRING = "a"; + private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING); + private static final byte[] QUAL = Bytes.toBytes("q"); + private static final long now = System.currentTimeMillis(); + private static final String SOURCE_TABLE_NAME = "SourceTable"; + private static final String DESTINATION_TABLE_NAME = "DestinationTable"; + + @Before + public void testSetup() throws Exception { + utility1.startMiniCluster(); + utility2.startMiniCluster(); + } + + @BeforeClass + public static void setUp() throws Exception { + conf1 = HBaseConfiguration.create(); + conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1"); + utility1 = new HBaseTestingUtility(conf1); + + utility1.startMiniZKCluster(); + miniZK = utility1.getZkCluster(); + // By setting the mini ZK cluster through this method, even though this is + // already utility1's mini ZK cluster, we are telling utility1 not to shut + // the mini ZK cluster when we shut down the HBase cluster. + utility1.setZkCluster(miniZK); + new ZooKeeperWatcher(conf1, "cluster1", null, true); + + conf2 = new Configuration(conf1); + conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2"); + + utility2 = new HBaseTestingUtility(conf2); + utility2.setZkCluster(miniZK); + new ZooKeeperWatcher(conf2, "cluster2", null, true); + utility1.startMiniMapReduceCluster(); + utility2.startMiniMapReduceCluster(); + } + + @After + public void testTeardown() throws Exception { + utility1.shutdownMiniCluster(); + utility2.shutdownMiniCluster(); + } + + @AfterClass + public static void tearDown() throws Exception { + utility1.shutdownMiniMapReduceCluster(); + utility2.shutdownMiniMapReduceCluster(); + } + + /** + * Reset the static state of CopyTable. Once CopyTable is refactored (see TODO + * in CopyTable.java) to not retain so much static state this helper method + * should be eliminated. + * + * @throws Exception + */ + @Before + @After + public void resetStaticStateOfCopyTable() { + CopyTable.rsClass = null; + CopyTable.rsImpl = null; + CopyTable.startTime = 0; + CopyTable.endTime = 0; + CopyTable.versions = -1; + CopyTable.tableName = null; + CopyTable.newTableName = null; + CopyTable.peerAddress = null; + CopyTable.families = null; + CopyTable.allCells = false; + } + + /** + * Create a table for use in testing. + */ + public HTable createTestTable(String tableName, Configuration conf) + throws MasterNotRunningException, ZooKeeperConnectionException, + IOException { + + HTableDescriptor table = new HTableDescriptor(tableName); + HColumnDescriptor fam = new HColumnDescriptor(FAMILYA); + table.addFamily(fam); + new HBaseAdmin(conf).createTable(table); + return new HTable(conf, tableName); + } + + /** + * Tests the CopyTable utility by copying data between tables in two separate + * test clusters. + * + * @throws Exception + */ + @Test + public void testSimpleCaseWithReplication() throws Exception { + HTable sourceTable = createTestTable(SOURCE_TABLE_NAME, conf1); + HTable destinationTable = createTestTable(DESTINATION_TABLE_NAME, conf2); + + Put p = new Put(ROW1); + p.add(FAMILYA, QUAL, now, QUAL); + p.add(FAMILYA, QUAL, now + 1, QUAL); + p.add(FAMILYA, QUAL, now + 2, QUAL); + sourceTable.put(p); + p = new Put(ROW2); + p.add(FAMILYA, QUAL, now, QUAL); + p.add(FAMILYA, QUAL, now + 1, QUAL); + p.add(FAMILYA, QUAL, now + 2, QUAL); + sourceTable.put(p); + + String[] args = new String[] { + "--peer.adr=" + conf2.get("hbase.zookeeper.quorum") + ":" + + conf2.get("hbase.zookeeper.property.clientPort") + ":" + + conf2.get("zookeeper.znode.parent"), + "--new.name=" + DESTINATION_TABLE_NAME, "--versions=3", + "--families=" + FAMILYA_STRING, SOURCE_TABLE_NAME }; + + GenericOptionsParser opts = new GenericOptionsParser(new Configuration( + conf1), args); + Configuration conf = opts.getConfiguration(); + args = opts.getRemainingArgs(); + + Job job = CopyTable.createSubmittableJob(conf, args); + + job.waitForCompletion(false); + assertTrue(job.isSuccessful()); + + Get g = new Get(ROW1); + g.setMaxVersions(); + Result r = destinationTable.get(g); + assertEquals(3, r.size()); + assertTrue(java.util.Arrays.equals(QUAL, r.getValue(FAMILYA, QUAL))); + g = new Get(ROW2); + g.setMaxVersions(); + r = destinationTable.get(g); + assertEquals(3, r.size()); + assertTrue(java.util.Arrays.equals(QUAL, r.getValue(FAMILYA, QUAL))); + } + + /** + * Tests CopyTable when delete markers are also copied between tables. + * + * @throws Exception + */ + @Test + public void testWithDeletes() throws Exception { + HTable sourceTable = createTestTable(SOURCE_TABLE_NAME, conf1); + HTable destinationTable = createTestTable(DESTINATION_TABLE_NAME, conf2); + + Put p = new Put(ROW1); + p.add(FAMILYA, QUAL, now, QUAL); + p.add(FAMILYA, QUAL, now + 1, QUAL); + p.add(FAMILYA, QUAL, now + 2, QUAL); + p.add(FAMILYA, QUAL, now + 3, QUAL); + p.add(FAMILYA, QUAL, now + 4, QUAL); + sourceTable.put(p); + + Delete d = new Delete(ROW1, now + 3, null); + sourceTable.delete(d); + d = new Delete(ROW1); + d.deleteColumns(FAMILYA, QUAL, now + 2); + sourceTable.delete(d); + + String[] args = new String[] { + "--peer.adr=" + conf2.get("hbase.zookeeper.quorum") + ":" + + conf2.get("hbase.zookeeper.property.clientPort") + ":" + + conf2.get("zookeeper.znode.parent"), + "--new.name=" + DESTINATION_TABLE_NAME, "--versions=5", + "--all.cells=true", SOURCE_TABLE_NAME }; + + GenericOptionsParser opts = new GenericOptionsParser(new Configuration( + conf1), args); + Configuration conf = opts.getConfiguration(); + args = opts.getRemainingArgs(); + + Job job = CopyTable.createSubmittableJob(conf, args); + job.waitForCompletion(false); + assertTrue(job.isSuccessful()); + + Scan s = new Scan(); + s.setMaxVersions(); + s.setRaw(true); + ResultScanner scanner = destinationTable.getScanner(s); + Result r = scanner.next(); + KeyValue[] res = r.raw(); + assertTrue(res[0].isDeleteFamily()); + assertEquals(now + 4, res[1].getTimestamp()); + assertEquals(now + 3, res[2].getTimestamp()); + assertTrue(res[3].isDelete()); + assertEquals(now + 2, res[4].getTimestamp()); + } + +} \ No newline at end of file