From 9c6c027f7ac9e9970f4c0ac6557482bfb96f6a5f Mon Sep 17 00:00:00 2001 From: rahulgidwani Date: Tue, 20 Jan 2015 15:08:53 -0800 Subject: [PATCH] Add scanner caching and batching options for the CopyTable job. --- .../apache/hadoop/hbase/mapreduce/CopyTable.java | 27 +++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index 7584bc2..8c612f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -66,6 +66,8 @@ public class CopyTable extends Configured implements Tool { String families = null; boolean allCells = false; static boolean shuffle = false; + int scannerCaching = -1; + int scannerBatching = -1; boolean bulkload = false; Path bulkloadDir = null; @@ -106,6 +108,14 @@ public class CopyTable extends Configured implements Tool { scan.setMaxVersions(versions); } + if(scannerCaching > 0) { + scan.setCaching(scannerCaching); + } + + if(scannerBatching > 0) { + scan.setBatch(scannerBatching); + } + if (startRow != null) { scan.setStartRow(Bytes.toBytes(startRow)); } @@ -179,7 +189,8 @@ public class CopyTable extends Configured implements Tool { System.err.println("ERROR: " + errorMsg); } System.err.println("Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] " + - "[--new.name=NEW] [--peer.adr=ADR] "); + "[--new.name=NEW] [--peer.adr=ADR] [--startRow=X] [--stopRow=Y] [--scannerCaching=X] [--scanBatchSize=X] "); + System.err.println(); System.err.println("Options:"); System.err.println(" rs.class hbase.regionserver.class of the peer cluster"); @@ -190,6 +201,8 @@ public class CopyTable extends Configured implements Tool { System.err.println(" starttime beginning of the time range (unixtime in millis)"); System.err.println(" without endtime means from starttime to forever"); System.err.println(" endtime end of the time range. Ignored if no starttime specified."); + System.err.println(" scannerCaching rows to cache in the scan"); + System.err.println(" scanBatchSize batch size of calls to next in the scan"); System.err.println(" versions number of cell versions to copy"); System.err.println(" new.name new table's name"); System.err.println(" peer.adr Address of the peer cluster given in the format"); @@ -297,6 +310,18 @@ public class CopyTable extends Configured implements Tool { continue; } + final String scannerCachingKey = "--scannerCaching="; + if (cmd.startsWith(scannerCachingKey)) { + scannerCaching = Integer.parseInt(cmd.substring(scannerCachingKey.length())); + continue; + } + + final String scanBatchSizeKey = "--scanBatchSize="; + if (cmd.startsWith(scanBatchSizeKey)) { + scannerBatching = Integer.parseInt(cmd.substring(scanBatchSizeKey.length())); + continue; + } + if (i == args.length-1) { tableName = cmd; } else { -- 2.1.0