diff --git src/docbkx/ops_mgt.xml src/docbkx/ops_mgt.xml
index d3035a4..ac00c5e 100644
--- src/docbkx/ops_mgt.xml
+++ src/docbkx/ops_mgt.xml
@@ -93,9 +93,9 @@
endtime End of the time range. Without endtime means starttime to forever.
versions Number of cell versions to copy.
new.name New table's name.
- peer.adr Address of the peer cluster given in the format hbase.zookeeper.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent
+ peer.adr Address of the peer cluster given in the format hbase.zookeeper.quorum:hbase.zookeeper.clientPort:zookeeper.znode.parent
families Comma-separated list of ColumnFamilies to copy.
- all.cells Also copy delete markers and uncollected deleted cells (advanced option).
+ all.cells Also copy delete markers and uncollected deleted cells (advanced option). If used the 'families' argument must be omitted.
Args:
diff --git src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
index 798f227..f692648 100644
--- src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
+++ src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
@@ -19,17 +19,23 @@
*/
package org.apache.hadoop.hbase.mapreduce;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+// TODO: clean up CopyTable class. Member variables that don't need to be public should be made
+// private. More importantly, keeping configuration state as static member variables is confusing
+// and leads to a situation where a user can unknowingly bring conflicting configuration settings from one
+// run on to the next, even when using a new CopyTable instance, potentially leading to subtle and
+// confusing bugs.
/**
* Tool used to copy a table to another one which can be on a different setup.
@@ -125,11 +131,13 @@ public class CopyTable {
System.err.println(" versions number of cell versions to copy");
System.err.println(" new.name new table's name");
System.err.println(" peer.adr Address of the peer cluster given in the format");
- System.err.println(" hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
+ System.err
+ .println(" hbase.zookeeper.quorum:hbase.zookeeper.property.clientPort:zookeeper.znode.parent");
System.err.println(" families comma-separated list of families to copy");
System.err.println(" To copy from cf1 to cf2, give sourceCfName:destCfName. ");
System.err.println(" To keep the same name, just give \"cfName\"");
- System.err.println(" all.cells also copy delete markers and deleted cells");
+ System.err
+ .println(" all.cells also copy delete markers and deleted cells. If used the 'families' argument must be omitted.");
System.err.println();
System.err.println("Args:");
System.err.println(" tablename Name of the table to copy");
@@ -219,6 +227,10 @@ public class CopyTable {
"peer address must be specified");
return false;
}
+ if (allCells == true && families != null) {
+ printUsage("You cannot use the all.cells flag when also specifying families");
+ return false;
+ }
} catch (Exception e) {
e.printStackTrace();
printUsage("Can't start because " + e.getMessage());
diff --git src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
new file mode 100644
index 0000000..77c9f1a
--- /dev/null
+++ src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
@@ -0,0 +1,254 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapreduce;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
+import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(MediumTests.class)
+public class TestCopyTable {
+
+ private static Configuration conf1;
+ private static Configuration conf2;
+ private static HBaseTestingUtility utility1;
+ private static HBaseTestingUtility utility2;
+ private static MiniZooKeeperCluster miniZK;
+ private static final byte[] ROW1 = Bytes.toBytes("row1");
+ private static final byte[] ROW2 = Bytes.toBytes("row2");
+ private static final String FAMILYA_STRING = "a";
+ private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
+ private static final byte[] QUAL = Bytes.toBytes("q");
+ private static final long now = System.currentTimeMillis();
+ private static final String SOURCE_TABLE_NAME = "SourceTable";
+ private static final String DESTINATION_TABLE_NAME = "DestinationTable";
+
+ @Before
+ public void testSetup() throws Exception {
+ utility1.startMiniCluster();
+ utility2.startMiniCluster();
+ }
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ conf1 = HBaseConfiguration.create();
+ conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
+ utility1 = new HBaseTestingUtility(conf1);
+
+ utility1.startMiniZKCluster();
+ miniZK = utility1.getZkCluster();
+ // By setting the mini ZK cluster through this method, even though this is
+ // already utility1's mini ZK cluster, we are telling utility1 not to shut
+ // the mini ZK cluster when we shut down the HBase cluster.
+ utility1.setZkCluster(miniZK);
+ new ZooKeeperWatcher(conf1, "cluster1", null, true);
+
+ conf2 = new Configuration(conf1);
+ conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2");
+
+ utility2 = new HBaseTestingUtility(conf2);
+ utility2.setZkCluster(miniZK);
+ new ZooKeeperWatcher(conf2, "cluster2", null, true);
+ utility1.startMiniMapReduceCluster();
+ utility2.startMiniMapReduceCluster();
+ }
+
+ @After
+ public void testTeardown() throws Exception {
+ utility1.shutdownMiniCluster();
+ utility2.shutdownMiniCluster();
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ utility1.shutdownMiniMapReduceCluster();
+ utility2.shutdownMiniMapReduceCluster();
+ }
+
+ /**
+ * Reset the static state of CopyTable. Once CopyTable is refactored (see TODO
+ * in CopyTable.java) to not retain so much static state this helper method
+ * should be eliminated.
+ *
+ * @throws Exception
+ */
+ @Before
+ @After
+ public void resetStaticStateOfCopyTable() {
+ CopyTable.rsClass = null;
+ CopyTable.rsImpl = null;
+ CopyTable.startTime = 0;
+ CopyTable.endTime = 0;
+ CopyTable.versions = -1;
+ CopyTable.tableName = null;
+ CopyTable.newTableName = null;
+ CopyTable.peerAddress = null;
+ CopyTable.families = null;
+ CopyTable.allCells = false;
+ }
+
+ /**
+ * Create a table for use in testing.
+ */
+ public HTable createTestTable(String tableName, Configuration conf)
+ throws MasterNotRunningException, ZooKeeperConnectionException,
+ IOException {
+
+ HTableDescriptor table = new HTableDescriptor(tableName);
+ HColumnDescriptor fam = new HColumnDescriptor(FAMILYA);
+ table.addFamily(fam);
+ new HBaseAdmin(conf).createTable(table);
+ return new HTable(conf, tableName);
+ }
+
+ /**
+ * Tests the CopyTable utility by copying data between tables in two separate
+ * test clusters.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSimpleCaseWithReplication() throws Exception {
+ HTable sourceTable = createTestTable(SOURCE_TABLE_NAME, conf1);
+ HTable destinationTable = createTestTable(DESTINATION_TABLE_NAME, conf2);
+
+ Put p = new Put(ROW1);
+ p.add(FAMILYA, QUAL, now, QUAL);
+ p.add(FAMILYA, QUAL, now + 1, QUAL);
+ p.add(FAMILYA, QUAL, now + 2, QUAL);
+ sourceTable.put(p);
+ p = new Put(ROW2);
+ p.add(FAMILYA, QUAL, now, QUAL);
+ p.add(FAMILYA, QUAL, now + 1, QUAL);
+ p.add(FAMILYA, QUAL, now + 2, QUAL);
+ sourceTable.put(p);
+
+ String[] args = new String[] {
+ "--peer.adr=" + conf2.get("hbase.zookeeper.quorum") + ":"
+ + conf2.get("hbase.zookeeper.property.clientPort") + ":"
+ + conf2.get("zookeeper.znode.parent"),
+ "--new.name=" + DESTINATION_TABLE_NAME, "--versions=3",
+ "--families=" + FAMILYA_STRING, SOURCE_TABLE_NAME };
+
+ GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
+ conf1), args);
+ Configuration conf = opts.getConfiguration();
+ args = opts.getRemainingArgs();
+
+ Job job = CopyTable.createSubmittableJob(conf, args);
+
+ job.waitForCompletion(false);
+ assertTrue(job.isSuccessful());
+
+ Get g = new Get(ROW1);
+ g.setMaxVersions();
+ Result r = destinationTable.get(g);
+ assertEquals(3, r.size());
+ assertTrue(java.util.Arrays.equals(QUAL, r.getValue(FAMILYA, QUAL)));
+ g = new Get(ROW2);
+ g.setMaxVersions();
+ r = destinationTable.get(g);
+ assertEquals(3, r.size());
+ assertTrue(java.util.Arrays.equals(QUAL, r.getValue(FAMILYA, QUAL)));
+ }
+
+ /**
+ * Tests CopyTable when delete markers are also copied between tables.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testWithDeletes() throws Exception {
+ HTable sourceTable = createTestTable(SOURCE_TABLE_NAME, conf1);
+ HTable destinationTable = createTestTable(DESTINATION_TABLE_NAME, conf2);
+
+ Put p = new Put(ROW1);
+ p.add(FAMILYA, QUAL, now, QUAL);
+ p.add(FAMILYA, QUAL, now + 1, QUAL);
+ p.add(FAMILYA, QUAL, now + 2, QUAL);
+ p.add(FAMILYA, QUAL, now + 3, QUAL);
+ p.add(FAMILYA, QUAL, now + 4, QUAL);
+ sourceTable.put(p);
+
+ Delete d = new Delete(ROW1, now + 3, null);
+ sourceTable.delete(d);
+ d = new Delete(ROW1);
+ d.deleteColumns(FAMILYA, QUAL, now + 2);
+ sourceTable.delete(d);
+
+ String[] args = new String[] {
+ "--peer.adr=" + conf2.get("hbase.zookeeper.quorum") + ":"
+ + conf2.get("hbase.zookeeper.property.clientPort") + ":"
+ + conf2.get("zookeeper.znode.parent"),
+ "--new.name=" + DESTINATION_TABLE_NAME, "--versions=5",
+ "--all.cells=true", SOURCE_TABLE_NAME };
+
+ GenericOptionsParser opts = new GenericOptionsParser(new Configuration(
+ conf1), args);
+ Configuration conf = opts.getConfiguration();
+ args = opts.getRemainingArgs();
+
+ Job job = CopyTable.createSubmittableJob(conf, args);
+ job.waitForCompletion(false);
+ assertTrue(job.isSuccessful());
+
+ Scan s = new Scan();
+ s.setMaxVersions();
+ s.setRaw(true);
+ ResultScanner scanner = destinationTable.getScanner(s);
+ Result r = scanner.next();
+ KeyValue[] res = r.raw();
+ assertTrue(res[0].isDeleteFamily());
+ assertEquals(now + 4, res[1].getTimestamp());
+ assertEquals(now + 3, res[2].getTimestamp());
+ assertTrue(res[3].isDelete());
+ assertEquals(now + 2, res[4].getTimestamp());
+ }
+
+}
\ No newline at end of file