From ba2ee92ec858ef89cf551db48018b1f5e8652739 Mon Sep 17 00:00:00 2001 From: rahulgidwani Date: Tue, 20 Jan 2015 16:03:54 -0800 Subject: [PATCH] Add scanner caching and batching options for the CopyTable job. --- .../apache/hadoop/hbase/mapreduce/CopyTable.java | 49 +++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index 27fb93c..0c6c76e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -63,6 +63,8 @@ public class CopyTable extends Configured implements Tool { String peerAddress = null; String families = null; boolean allCells = false; + int scannerCaching = -1; + int scannerBatching = -1; boolean bulkload = false; Path bulkloadDir = null; @@ -92,6 +94,10 @@ public class CopyTable extends Configured implements Tool { static String families_ = null; @Deprecated static boolean allCells_ = false; + @Deprecated + static int scannerCaching_ = -1; + @Deprecated + static int scannerBatching_ = -1; public CopyTable(Configuration conf) { super(conf); @@ -132,6 +138,12 @@ public class CopyTable extends Configured implements Tool { if (stopRow_ != null) { scan.setStopRow(Bytes.toBytes(stopRow_)); } + if (scannerBatching_ > 0) { + scan.setBatch(scannerBatching_); + } + if (scannerCaching_ > 0) { + scan.setCaching(scannerCaching_); + } if(families_ != null) { String[] fams = families_.split(","); Map cfRenameMap = new HashMap(); @@ -218,6 +230,19 @@ public class CopyTable extends Configured implements Tool { allCells_ = true; continue; } + + final String scannerCachingKey = "--scannerCaching="; + if (cmd.startsWith(scannerCachingKey)) { + scannerCaching_ = Integer.parseInt(cmd.substring(scannerCachingKey.length())); + continue; + } + + final String scanBatchSizeKey = "--scanBatchSize="; + if (cmd.startsWith(scanBatchSizeKey)) { + scannerBatching_ = Integer.parseInt(cmd.substring(scanBatchSizeKey.length())); + continue; + } + if (i == args.length-1) { tableName_ = cmd; } else { @@ -278,6 +303,14 @@ public class CopyTable extends Configured implements Tool { if (stopRow != null) { scan.setStopRow(Bytes.toBytes(stopRow)); } + + if (scannerCaching > 0) { + scan.setCaching(scannerCaching); + } + + if (scannerBatching > 0) { + scan.setBatch(scannerBatching); + } if(families != null) { String[] fams = families.split(","); @@ -331,7 +364,7 @@ public class CopyTable extends Configured implements Tool { Import.Importer.class, null, null, job); TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null, - null); + null); } return job; @@ -356,6 +389,8 @@ public class CopyTable extends Configured implements Tool { System.err.println(" starttime beginning of the time range (unixtime in millis)"); System.err.println(" without endtime means from starttime to forever"); System.err.println(" endtime end of the time range. Ignored if no starttime specified."); + System.err.println(" scannerCaching rows to cache in the scan"); + System.err.println(" scanBatchSize batch size of calls to next in the scan"); System.err.println(" versions number of cell versions to copy"); System.err.println(" new.name new table's name"); System.err.println(" peer.adr Address of the peer cluster given in the format"); @@ -453,6 +488,18 @@ public class CopyTable extends Configured implements Tool { continue; } + final String scannerCachingKey = "--scannerCaching="; + if (cmd.startsWith(scannerCachingKey)) { + scannerCaching = Integer.parseInt(cmd.substring(scannerCachingKey.length())); + continue; + } + + final String scanBatchSizeKey = "--scanBatchSize="; + if (cmd.startsWith(scanBatchSizeKey)) { + scannerBatching = Integer.parseInt(cmd.substring(scanBatchSizeKey.length())); + continue; + } + if (i == args.length-1) { tableName = cmd; } else { -- 2.1.0