commit 1e9908b56df842967cd1f3b0b3eca78c43e767b2 Author: Nicolas Spiegelberg Date: 5 weeks ago HBASE-4783 Improve RowCounter to count rows in a specific key range. Summary: Currently RowCounter in MR package is a very simple map only job, and this change should let user specify a key range and count the number of rows in this range. It's done by specifying as arguments "--range=foo,bar" to the program loader, which would count between ["foo", "bar") that has any column. Test Plan: This change is actually a modification used to test diff D295523. First I loaded rows using recovery utility with specified key range (feature added in D295523) and let this counter count the same range specified in recovery, and check if total num of rows equals to num of rows in range. Then, I tried setting rowcounter.start.key to the end key of that specified in the previous recovery, and check if there is indeed no row in range. diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index de58ad2..1729c47 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -81,21 +81,42 @@ public class RowCounter { public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; - Job job = new Job(conf, NAME + "_" + tableName); - job.setJarByClass(RowCounter.class); - // Columns are space delimited + String startKey = null; + String endKey = null; StringBuilder sb = new StringBuilder(); - final int columnoffset = 1; - for (int i = columnoffset; i < args.length; i++) { - if (i > columnoffset) { + + // First argument is table name, starting from second + for (int i = 1; i < args.length; i++) { + final String rangeSwitch = "--range="; + if (args[i].startsWith(rangeSwitch)) { + String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2); + if (startEnd.length != 2 || startEnd[1].contains(",")) { + printUsage("Please specify range in such format as \"--range=a,b\" " + + "or, with only one boundary, \"--range=,b\" or \"--range=a,\""); + return null; + } + startKey = startEnd[0]; + endKey = startEnd[1]; + } + else { + // if no switch, assume column names + sb.append(args[i]); sb.append(" "); } - sb.append(args[i]); } + + Job job = new Job(conf, NAME + "_" + tableName); + job.setJarByClass(RowCounter.class); Scan scan = new Scan(); + if (startKey != null && !startKey.equals("")) { + scan.setStartRow(Bytes.toBytes(startKey)); + } + if (endKey != null && !endKey.equals("")) { + scan.setStopRow(Bytes.toBytes(endKey)); + } scan.setFilter(new FirstKeyOnlyFilter()); if (sb.length() > 0) { - for (String columnName :sb.toString().split(" ")) { + for (String columnName : sb.toString().trim().split(" ")) { String [] fields = columnName.split(":"); if(fields.length == 1) { scan.addFamily(Bytes.toBytes(fields[0])); @@ -104,7 +125,6 @@ public class RowCounter { } } } - // Second argument is the table name. job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); @@ -112,6 +132,22 @@ public class RowCounter { return job; } + /* + * @param errorMessage Can attach a message when error occurs. + */ + private static void printUsage(String errorMessage) { + System.err.println("ERROR: " + errorMessage); + printUsage(); + } + + /* + * Prints usage without error message + */ + private static void printUsage() { + System.err.println("Usage: RowCounter " + + "[--range=[startKey],[endKey]] [ ...]"); + } + /** * Main entry point. * @@ -122,11 +158,13 @@ public class RowCounter { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 1) { - System.err.println("ERROR: Wrong number of parameters: " + args.length); - System.err.println("Usage: RowCounter [ ...]"); + printUsage("Wrong number of parameters: " + args.length); System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); + if (job == null) { + System.exit(-1); + } System.exit(job.waitForCompletion(true) ? 0 : 1); } } diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java index 468e38d..7ebd62f 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java @@ -133,7 +133,6 @@ extends InputFormat { keys.getFirst().length == 0) { throw new IOException("Expecting at least one region."); } - int count = 0; List splits = new ArrayList(keys.getFirst().length); for (int i = 0; i < keys.getFirst().length; i++) { if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) { @@ -158,8 +157,9 @@ extends InputFormat { InputSplit split = new TableSplit(table.getTableName(), splitStart, splitStop, regionLocation); splits.add(split); - if (LOG.isDebugEnabled()) - LOG.debug("getSplits: split -> " + (count++) + " -> " + split); + if (LOG.isDebugEnabled()) { + LOG.debug("getSplits: split -> " + i + " -> " + split); + } } } return splits;