Index: hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java =================================================================== --- hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java (revision 1552301) +++ hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java (working copy) @@ -51,14 +51,17 @@ * * Then the test creates a snapshot from this table, and overrides the values in the original * table with values 'after_snapshot_value'. The test, then runs a mapreduce job over the snapshot - * with a scan start row 'bbb' and stop row 'yyy'. The data is saved in a single reduce output file, and + * with a scan start row 'bbb' and stop row 'yyy'. The data is saved in a single reduce output + * file, and * inspected later to verify that the MR job has seen all the values from the snapshot. * *

These parameters can be used to configure the job: *
"IntegrationTestTableSnapshotInputFormat.table" => the name of the table *
"IntegrationTestTableSnapshotInputFormat.snapshot" => the name of the snapshot - *
"IntegrationTestTableSnapshotInputFormat.numRegions" => number of regions in the table to be created - *
"IntegrationTestTableSnapshotInputFormat.tableDir" => temporary directory to restore the snapshot files + *
"IntegrationTestTableSnapshotInputFormat.numRegions" => number of regions in the table + * to be created + *
"IntegrationTestTableSnapshotInputFormat.tableDir" => temporary directory to restore the + * snapshot files * */ @Category(IntegrationTests.class) @@ -70,10 +73,11 @@ private static final String TABLE_NAME_KEY = "IntegrationTestTableSnapshotInputFormat.table"; private static final String DEFAULT_TABLE_NAME = "IntegrationTestTableSnapshotInputFormat"; - private static final String SNAPSHOT_NAME_KEY = "IntegrationTestTableSnapshotInputFormat.snapshot"; + private static final String SNAPSHOT_NAME_KEY = + "IntegrationTestTableSnapshotInputFormat.snapshot"; + private static final String NUM_REGIONS_KEY = + "IntegrationTestTableSnapshotInputFormat.numRegions"; - - private static final String NUM_REGIONS_KEY = "IntegrationTestTableSnapshotInputFormat.numRegions"; private static final int DEFAULT_NUM_REGIONS = 32; private static final String TABLE_DIR_KEY = "IntegrationTestTableSnapshotInputFormat.tableDir"; Index: hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java (revision 1553226) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java (working copy) @@ -193,7 +193,8 @@ addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)"); addCommandDescriptor(FilteredScanTest.class, "filterScan", - "Run scan test using a filter to find a specific row based on it's value (make sure to use --rows=20)"); + "Run scan test using a filter to find a specific row based on it's value " + + "(make sure to use --rows=20)"); } protected void addCommandDescriptor(Class cmdClass, @@ -1584,13 +1585,13 @@ this.useTags = Boolean.parseBoolean(cmd.substring(useTags.length())); continue; } - + final String noOfTags = "--nooftags="; if (cmd.startsWith(noOfTags)) { this.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length())); continue; } - + Class cmdClass = determineCommandClass(cmd); if (cmdClass != null) { getArgs(i + 1, args); Index: hbase-server/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java (revision 1553226) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/rest/PerformanceEvaluation.java (working copy) @@ -190,7 +190,8 @@ addCommandDescriptor(ScanTest.class, "scan", "Run scan test (read every row)"); addCommandDescriptor(FilteredScanTest.class, "filterScan", - "Run scan test using a filter to find a specific row based on it's value (make sure to use --rows=20)"); + "Run scan test using a filter to find a specific row based " + + "on it's value (make sure to use --rows=20)"); } protected void addCommandDescriptor(Class cmdClass, @@ -1329,7 +1330,8 @@ } System.err.println("Usage: java " + this.getClass().getName() + " \\"); System.err.println(" [--nomapred] [--rows=ROWS] [--table=NAME] \\"); - System.err.println(" [--compress=TYPE] [--blockEncoding=TYPE] [-D]* "); + System.err.println(" [--compress=TYPE] [--blockEncoding=TYPE] " + + "[-D]* "); System.err.println(); System.err.println("Options:"); System.err.println(" nomapred Run multiple clients using threads " + @@ -1337,15 +1339,17 @@ System.err.println(" rows Rows each client runs. Default: One million"); System.err.println(" table Alternate table name. Default: 'TestTable'"); System.err.println(" compress Compression type to use (GZ, LZO, ...). Default: 'NONE'"); - System.err.println(" flushCommits Used to determine if the test should flush the table. Default: false"); + System.err.println(" flushCommits Used to determine if the test should flush the table. " + + "Default: false"); System.err.println(" writeToWAL Set writeToWAL on puts. Default: True"); - System.err.println(" presplit Create presplit table. Recommended for accurate perf analysis (see guide). Default: disabled"); - System.err - .println(" inmemory Tries to keep the HFiles of the CF inmemory as far as possible. Not " + - "guaranteed that reads are always served from inmemory. Default: false"); - System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. Default : false"); - System.err - .println(" numoftags Specify the no of tags that would be needed. This works only if usetags is true."); + System.err.println(" presplit Create presplit table. Recommended for accurate perf " + + "analysis (see guide). Default: disabled"); + System.err.println(" inmemory Tries to keep the HFiles of the CF inmemory as far as " + + "possible. Not guaranteed that reads are always served from inmemory. Default: false"); + System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. " + + "Default : false"); + System.err.println(" numoftags Specify the no of tags that would be needed. " + + "This works only if usetags is true."); System.err.println(); System.err.println(" Note: -D properties will be applied to the conf used. "); System.err.println(" For example: "); Index: hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java (revision 1553226) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java (working copy) @@ -120,15 +120,18 @@ Assert.assertEquals(Lists.newArrayList("h1"), tsif.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2); - Assert.assertEquals(Lists.newArrayList("h1", "h2"), tsif.getBestLocations(conf, blockDistribution)); + Assert.assertEquals(Lists.newArrayList("h1", "h2"), + tsif.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3); - Assert.assertEquals(Lists.newArrayList("h2", "h1"), tsif.getBestLocations(conf, blockDistribution)); + Assert.assertEquals(Lists.newArrayList("h2", "h1"), + tsif.getBestLocations(conf, blockDistribution)); blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6); blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9); - Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"), tsif.getBestLocations(conf, blockDistribution)); + Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"), + tsif.getBestLocations(conf, blockDistribution)); } public static enum TestTableSnapshotCounters { @@ -148,7 +151,8 @@ public static class TestTableSnapshotReducer extends Reducer { - HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(bbb, yyy); + HBaseTestingUtility.SeenRowTracker rowTracker = + new HBaseTestingUtility.SeenRowTracker(bbb, yyy); @Override protected void reduce(ImmutableBytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -207,8 +211,8 @@ testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8); } - public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, int numRegions, int expectedNumSplits) - throws Exception { + public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, + int numRegions, int expectedNumSplits) throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testWithMockedMapReduce"); try { @@ -239,7 +243,8 @@ Assert.assertEquals(expectedNumSplits, splits.size()); - HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow); + HBaseTestingUtility.SeenRowTracker rowTracker = + new HBaseTestingUtility.SeenRowTracker(startRow, stopRow); for (int i = 0; i < splits.size(); i++) { // validate input split @@ -249,7 +254,8 @@ // validate record reader TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); - RecordReader rr = tsif.createRecordReader(split, taskAttemptContext); + RecordReader rr = + tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext); // validate we can read all the data back @@ -266,7 +272,8 @@ rowTracker.validate(); } - public static void verifyRowFromMap(ImmutableBytesWritable key, Result result) throws IOException { + public static void verifyRowFromMap(ImmutableBytesWritable key, Result result) + throws IOException { byte[] row = key.get(); CellScanner scanner = result.cellScanner(); while (scanner.advance()) { @@ -317,8 +324,8 @@ // this is also called by the IntegrationTestTableSnapshotInputFormat public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, - String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) - throws Exception { + String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, + boolean shutdownCluster) throws Exception { //create the table and snapshot createTableAndSnapshot(util, tableName, snapshotName, numRegions); @@ -333,7 +340,8 @@ Scan scan = new Scan(bbb, yyy); // limit the scan job.setJarByClass(util.getClass()); - TableMapReduceUtil.addDependencyJars(job.getConfiguration(), TestTableSnapshotInputFormat.class); + TableMapReduceUtil.addDependencyJars(job.getConfiguration(), + TestTableSnapshotInputFormat.class); TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, Index: hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java (revision 1553226) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java (working copy) @@ -124,8 +124,8 @@ testScanner(UTIL, "testWithMultiRegion", 20, true); } - private void testScanner(HBaseTestingUtility util, String snapshotName, int numRegions, boolean shutdownCluster) - throws Exception { + private void testScanner(HBaseTestingUtility util, String snapshotName, int numRegions, + boolean shutdownCluster) throws Exception { setupCluster(); TableName tableName = TableName.valueOf("testScanner"); try { @@ -138,7 +138,8 @@ Path restoreDir = util.getDataTestDirOnTestFS(snapshotName); Scan scan = new Scan(bbb, yyy); // limit the scan - TableSnapshotScanner scanner = new TableSnapshotScanner(UTIL.getConfiguration(), restoreDir, snapshotName, scan); + TableSnapshotScanner scanner = new TableSnapshotScanner(UTIL.getConfiguration(), restoreDir, + snapshotName, scan); verifyScanner(scanner, bbb, yyy); scanner.close(); @@ -154,7 +155,8 @@ private void verifyScanner(ResultScanner scanner, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { - HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow, stopRow); + HBaseTestingUtility.SeenRowTracker rowTracker = + new HBaseTestingUtility.SeenRowTracker(startRow, stopRow); while (true) { Result result = scanner.next(); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java (revision 1552301) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java (working copy) @@ -82,7 +82,8 @@ * while there are jobs reading from snapshot files. *

* Usage is similar to TableInputFormat, and - * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job, boolean, Path)} + * {@link TableMapReduceUtil#initTableSnapshotMapperJob(String, Scan, Class, Class, Class, Job, + * boolean, Path)} * can be used to configure the job. *

{@code
  * Job job = new Job(conf);
@@ -100,12 +101,13 @@
  * 

* HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from * snapshot files and data files. HBase also enforces security because all the requests are handled - * by the server layer, and the user cannot read from the data files directly. To read from snapshot - * files directly from the file system, the user who is running the MR job must have sufficient - * permissions to access snapshot and reference files. This means that to run mapreduce over - * snapshot files, the MR job has to be run as the HBase user or the user must have group or other - * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from - * snapshot/data files will completely circumvent the access control enforced by HBase. + * by the server layer, and the user cannot read from the data files directly. + * To read from snapshot files directly from the file system, the user who is running the MR job + * must have sufficient permissions to access snapshot and reference files. + * This means that to run mapreduce over snapshot files, the MR job has to be run as the HBase + * user or the user must have group or other priviledges in the filesystem (See HBASE-8369). + * Note that, given other users access to read from snapshot/data files will completely circumvent + * the access control enforced by HBase. * @see TableSnapshotScanner */ @InterfaceAudience.Public @@ -117,7 +119,8 @@ private static final Log LOG = LogFactory.getLog(TableSnapshotInputFormat.class); /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */ - private static final String LOCALITY_CUTOFF_MULTIPLIER = "hbase.tablesnapshotinputformat.locality.cutoff.multiplier"; + private static final String LOCALITY_CUTOFF_MULTIPLIER = + "hbase.tablesnapshotinputformat.locality.cutoff.multiplier"; private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f; private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name"; @@ -177,7 +180,8 @@ int len = in.readInt(); byte[] buf = new byte[len]; in.readFully(buf); - MapReduceProtos.TableSnapshotRegionSplit split = MapReduceProtos.TableSnapshotRegionSplit.PARSER.parseFrom(buf); + MapReduceProtos.TableSnapshotRegionSplit split = + MapReduceProtos.TableSnapshotRegionSplit.PARSER.parseFrom(buf); this.regionName = Bytes.toString(split.getRegion().getValue().toByteArray()); List locationsList = split.getLocationsList(); this.locations = locationsList.toArray(new String[locationsList.size()]); @@ -185,7 +189,8 @@ } @VisibleForTesting - static class TableSnapshotRegionRecordReader extends RecordReader { + static class TableSnapshotRegionRecordReader extends + RecordReader { private TableSnapshotRegionSplit split; private Scan scan; private Result result = null; @@ -223,8 +228,9 @@ throw new IllegalArgumentException("A Scan is not configured for this job"); } scan = TableMapReduceUtil.convertStringToScan(scanStr); - scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED); // region is immutable, this should be fine, - // otherwise we have to set the thread read point + // region is immutable, this should be fine, + // otherwise we have to set the thread read point + scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED); scanner = new ClientSideRegionScanner(conf, fs, tmpRootDir, htd, hri, scan, null); if (context != null) { @@ -336,7 +342,8 @@ * weights into account, thus will treat every location passed from the input split as equal. We * do not want to blindly pass all the locations, since we are creating one split per region, and * the region's blocks are all distributed throughout the cluster unless favorite node assignment - * is used. On the expected stable case, only one location will contain most of the blocks as local. + * is used. On the expected stable case, only one location will contain most of the blocks as + * local. * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here * we are doing a simple heuristic, where we will pass all hosts which have at least 80% * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (revision 1552301) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (working copy) @@ -674,10 +674,14 @@ System.err.println(" -snapshot NAME Snapshot to restore."); System.err.println(" -copy-to NAME Remote destination hdfs://"); System.err.println(" -no-checksum-verify Do not verify checksum."); - System.err.println(" -chuser USERNAME Change the owner of the files to the specified one."); - System.err.println(" -chgroup GROUP Change the group of the files to the specified one."); - System.err.println(" -chmod MODE Change the permission of the files to the specified one."); - System.err.println(" -mappers Number of mappers to use during the copy (mapreduce.job.maps)."); + System.err.println(" -chuser USERNAME Change the owner of the files " + + "to the specified one."); + System.err.println(" -chgroup GROUP Change the group of the files to " + + "the specified one."); + System.err.println(" -chmod MODE Change the permission of the files " + + "to the specified one."); + System.err.println(" -mappers Number of mappers to use during the " + + "copy (mapreduce.job.maps)."); System.err.println(); System.err.println("Examples:"); System.err.println(" hbase " + getClass() + " \\"); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java (revision 1552301) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java (working copy) @@ -48,7 +48,8 @@ List values; public ClientSideRegionScanner(Configuration conf, FileSystem fs, - Path rootDir, HTableDescriptor htd, HRegionInfo hri, Scan scan, ScanMetrics scanMetrics) throws IOException { + Path rootDir, HTableDescriptor htd, HRegionInfo hri, Scan scan, ScanMetrics scanMetrics) + throws IOException { this.scan = scan;