diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index 044d111..b0c4622 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -135,8 +135,11 @@ public class CopyTable extends Configured implements Tool { System.err.println(); System.err.println("Options:"); System.err.println(" rs.class hbase.regionserver.class of the peer cluster"); + System.err.println(" defaults to org.apache.hadoop.hbase.ipc.ReplicationRegionInterface"); System.err.println(" specify if different from current cluster"); System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster"); + System.err.println(" defaults to"); + System.err.println(" org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer"); System.err.println(" startrow the start row"); System.err.println(" stoprow the stop row"); System.err.println(" starttime beginning of the time range (unixtime in millis)"); diff --git src/main/docbkx/ops_mgt.xml src/main/docbkx/ops_mgt.xml index 6246fad..e405beb 100644 --- src/main/docbkx/ops_mgt.xml +++ src/main/docbkx/ops_mgt.xml @@ -188,20 +188,50 @@ private static final int ERROR_EXIT_CODE = 4;
Driver - There is a Driver class that is executed by the HBase jar can be used to - invoke frequently accessed utilities. For example, - HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar - -An example program must be given as the first argument. -Valid program names are: - completebulkload: Complete a bulk data load. - copytable: Export a table from local cluster to peer cluster - export: Write table data to HDFS. - import: Import data written by Export. - importtsv: Import data in TSV format. - rowcounter: Count rows in HBase table - verifyrep: Compare the data from tables in two different clusters. WARNING: It doesn't work for incrementColumnValues'd cells since the timestamp is chan + Several frequently-accessed utilities are provided as Driver classes, and executed by + the bin/hbase command. These utilities represent MapReduce jobs which + run on your cluster. They are run in the following way, replacing + UtilityName with the utility you want to run. This command + assumes you have set the environment variable HBASE_HOME to the directory + where HBase is unpacked on your server. + +${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.mapreduce.UtilityName + The following utilities are available: + + + LoadIncrementalHFiles + Complete a bulk data load. + + + CopyTable + Export a table from the local cluster to a peer cluster. + + + Export + Write table data to HDFS. + + + Import + Import data written by a previous export operation. + + + ImportTsv + Import data in TSV format. + + + RowCounter + Count rows in an HBase table. + + + replication.VerifyReplication + Compare the data from tables in two different clusters. WARNING: It + doesn't work for incrementColumnValues'd cells since the timestamp is changed. Note that + this command is in a different package than the others. + + + Each command except RowCounter accepts a single + --help argument to print usage instructions.
@@ -266,66 +296,46 @@ Valid program names are: CopyTable is a utility that can copy part or of all of a table, either to the same cluster or another cluster. The target table must first exist. The usage is as follows: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable [--starttime=X] [--endtime=Y] [--new.name=NEW] [--peer.adr=ADR] tablename - - - Options - - starttime - - Beginning of the time range. Without endtime means starttime to forever. - - - - endtime - - End of the time range. Without endtime means starttime to forever. - - - - versions - - Number of cell versions to copy. - - - - new.name - - New table's name. - - - - peer.adr - - Address of the peer cluster given in the format - hbase.zookeeper.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent - - - - families - - Comma-separated list of ColumnFamilies to copy. - - - - all.cells - - Also copy delete markers and uncollected deleted cells (advanced option). - - - - - Args: - - tablename Name of table to copy. - - - Example of copying 'TestTable' to a cluster that uses replication for a 1 hour - window: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable ---starttime=1265875194289 --endtime=1265878794289 ---peer.adr=server1,server2,server3:2181:/hbase TestTable + +$ ./bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable --help +/bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable --help +Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] [--new.name=NEW] [--peer.adr=ADR] <tablename> + +Options: + rs.class hbase.regionserver.class of the peer cluster, + defaults to org.apache.hadoop.hbase.ipc.ReplicationRegionInterface + specify if different from current cluster + rs.impl hbase.regionserver.impl of the peer cluster, + defaults to + org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer + startrow the start row + stoprow the stop row + starttime beginning of the time range (unixtime in millis) + without endtime means from starttime to forever + endtime end of the time range. Ignored if no starttime specified. + versions number of cell versions to copy + new.name new table's name + peer.adr Address of the peer cluster given in the format + hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent + families comma-separated list of families to copy + To copy from cf1 to cf2, give sourceCfName:destCfName. + To keep the same name, just give "cfName" + all.cells also copy delete markers and deleted cells + +Args: + tablename Name of the table to copy + +Examples: + To copy 'TestTable' to a cluster that uses replication for a 1 hour window: + $ bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable --starttime=1265875194289 --endtime=1265878794289 --peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable +For performance consider the following general options: +-Dhbase.client.scanner.caching=100 +-Dmapred.map.tasks.speculative.execution=false + + + ReplicationRegionInterface and ReplicationRegionServer + Scanner Caching Caching for the input Scan is configured via hbase.client.scanner.caching