Index: src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java =================================================================== --- src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java (working copy) @@ -248,7 +248,7 @@ @Override void setUp() throws Exception { super.setUp(); - this.scanner = this.reader.getScanner(); + this.scanner = this.reader.getScanner(false, false); this.scanner.seekTo(); } @@ -280,7 +280,7 @@ @Override void doRow(int i) throws Exception { - HFileScanner scanner = this.reader.getScanner(); + HFileScanner scanner = this.reader.getScanner(false, true); byte [] b = getRandomRow(); scanner.seekTo(b); ByteBuffer k = scanner.getKey(); @@ -304,7 +304,7 @@ @Override void doRow(int i) throws Exception { - HFileScanner scanner = this.reader.getScanner(); + HFileScanner scanner = this.reader.getScanner(false, false); byte [] b = getRandomRow(); if (scanner.seekTo(b) != 0) { System.out.println("Nonexistent row: " + new String(b)); @@ -338,7 +338,7 @@ @Override void doRow(int i) throws Exception { - HFileScanner scanner = this.reader.getScanner(); + HFileScanner scanner = this.reader.getScanner(false, true); scanner.seekTo(getGaussianRandomRowBytes()); for (int ii = 0; ii < 30; ii++) { if (!scanner.next()) { Index: src/test/org/apache/hadoop/hbase/regionserver/TestGetDeleteTracker.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestGetDeleteTracker.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/regionserver/TestGetDeleteTracker.java (working copy) @@ -252,10 +252,8 @@ dt.add(del.buffer, del.qualifierOffset, del.qualifierLength, del.timestamp, del.type); } - //update() dt.update(); - assertEquals(false, dt.isDeleted(col2, 0, col2Len, ts3)); assertEquals(false, dt.isDeleted(col2, 0, col2Len, ts1)); } Index: src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java (working copy) @@ -208,7 +208,7 @@ boolean containsStartRow = false; for (StoreFile f: this.r.stores.get(COLUMN_FAMILY_TEXT).getStorefiles(). values()) { - HFileScanner scanner = f.getReader().getScanner(false); + HFileScanner scanner = f.getReader().getScanner(false, false); scanner.seekTo(); do { byte [] row = scanner.getKeyValue().getRow(); @@ -239,7 +239,7 @@ int count = 0; for (StoreFile f: this.r.stores. get(COLUMN_FAMILY_TEXT).getStorefiles().values()) { - HFileScanner scanner = f.getReader().getScanner(); + HFileScanner scanner = f.getReader().getScanner(false, false); if (!scanner.seekTo()) { continue; } Index: src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (working copy) @@ -127,7 +127,7 @@ StoreFile refHsf = new StoreFile(this.fs, refPath, true, conf, false); // Now confirm that I can read from the reference and that it only gets // keys from top half of the file. - HFileScanner s = refHsf.getReader().getScanner(); + HFileScanner s = refHsf.getReader().getScanner(false, false); for(boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { ByteBuffer bb = s.getKey(); hsk = HStoreKey.create(bb.array(), bb.arrayOffset(), bb.limit()); @@ -171,7 +171,7 @@ // Now test reading from the top. boolean first = true; ByteBuffer key = null; - HFileScanner topScanner = top.getScanner(); + HFileScanner topScanner = top.getScanner(false, false); while ((!topScanner.isSeeked() && topScanner.seekTo()) || (topScanner.isSeeked() && topScanner.next())) { key = topScanner.getKey(); @@ -186,7 +186,7 @@ LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); first = true; - HFileScanner bottomScanner = bottom.getScanner(); + HFileScanner bottomScanner = bottom.getScanner(false, false); while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { previous = bottomScanner.getKey(); @@ -214,7 +214,7 @@ Range.bottom); top = new StoreFile(this.fs, topPath, true, conf, false).getReader(); bottom = new StoreFile(this.fs, bottomPath, true, conf, false).getReader(); - bottomScanner = bottom.getScanner(); + bottomScanner = bottom.getScanner(false, false); int count = 0; while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { @@ -224,7 +224,7 @@ assertTrue(count == 0); // Now read from the top. first = true; - topScanner = top.getScanner(); + topScanner = top.getScanner(false, false); while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { key = topScanner.getKey(); @@ -259,7 +259,7 @@ top = new StoreFile(this.fs, topPath, true, conf, false).getReader(); bottom = new StoreFile(this.fs, bottomPath, true, conf, false).getReader(); first = true; - bottomScanner = bottom.getScanner(); + bottomScanner = bottom.getScanner(false, false); while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { key = bottomScanner.getKey(); @@ -279,7 +279,7 @@ assertTrue(Bytes.toString(keyhsk.getRow()).charAt(i) == 'z'); } count = 0; - topScanner = top.getScanner(); + topScanner = top.getScanner(false, false); while ((!topScanner.isSeeked() && topScanner.seekTo()) || (topScanner.isSeeked() && topScanner.next())) { count++; Index: src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java =================================================================== --- src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java (working copy) @@ -160,7 +160,7 @@ KeySampler kSampler = new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), keyLenGen); - HFileScanner scanner = reader.getScanner(); + HFileScanner scanner = reader.getScanner(false, false); BytesWritable key = new BytesWritable(); timer.reset(); timer.start(); Index: src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java =================================================================== --- src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (working copy) @@ -140,7 +140,8 @@ fs.getFileStatus(ncTFile).getLen(), null, false); // Load up the index. reader.loadFileInfo(); - HFileScanner scanner = reader.getScanner(); + // Get a scanner that caches and that does not use pread. + HFileScanner scanner = reader.getScanner(true, false); // Align scanner at start of the file. scanner.seekTo(); readAllRecords(scanner); @@ -203,7 +204,7 @@ .getLen(), null, false); reader.loadFileInfo(); // No data -- this should return false. - assertFalse(reader.getScanner().seekTo()); + assertFalse(reader.getScanner(false, false).seekTo()); someReadingWithMetaBlock(reader); fs.delete(mFile, true); reader.close(); Index: src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java =================================================================== --- src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java (working copy) @@ -77,7 +77,8 @@ List keys = slurp("/Users/ryan/xaa.50k"); - HFileScanner scanner = reader.getScanner(); + // Get a scanner that doesn't cache and that uses pread. + HFileScanner scanner = reader.getScanner(false, true); int count; long totalBytes = 0; int notFound = 0; Index: src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java =================================================================== --- src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java (revision 903511) +++ src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java (working copy) @@ -244,7 +244,7 @@ case 1: default: { - HFileScanner scanner = reader.getScanner(); + HFileScanner scanner = reader.getScanner(false, false); scanner.seekTo(); for (long l=0 ; l args.length) { throw new IllegalArgumentException("must supply the number of clients"); } - N = Integer.parseInt(args[start]); if (N < 1) { throw new IllegalArgumentException("Number of clients must be > 1"); } - // Set total number of rows to write. this.R = this.R * N; } Index: src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (revision 903511) +++ src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (working copy) @@ -219,7 +219,8 @@ LOG.warn("StoreFile " + sf + " has null Reader"); continue; } - s.add(r.getScanner(cacheBlocks)); + // Get a scanner that does not use pread. + s.add(r.getScanner(this.cacheBlocks, false)); } List scanners = new ArrayList(s.size()+1); Index: src/java/org/apache/hadoop/hbase/regionserver/Store.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/Store.java (revision 903511) +++ src/java/org/apache/hadoop/hbase/regionserver/Store.java (working copy) @@ -871,8 +871,8 @@ LOG.warn("StoreFile " + filesToCompact.get(i) + " has a null Reader"); continue; } - // Instantiate HFile.Reader.Scanner to not cache blocks - scanners[i] = new StoreFileScanner(r.getScanner(false)); + // Instantiate HFile.Reader.Scanner to not cache blocks and not use pread + scanners[i] = new StoreFileScanner(r.getScanner(false, false)); } // Make the instantiation lazy in case compaction produces no product; i.e. @@ -1114,7 +1114,8 @@ // last key. TODO: Cache last and first key rather than make each time. firstOnRow = new KeyValue(lastKV.getRow(), HConstants.LATEST_TIMESTAMP); } - HFileScanner scanner = r.getScanner(); + // Get a scanner that caches blocks and that uses pread. + HFileScanner scanner = r.getScanner(true, true); // Seek scanner. If can't seek it, return. if (!seekToScanner(scanner, firstOnRow, firstKV)) return; // If we found candidate on firstOnRow, just return. THIS WILL NEVER HAPPEN! @@ -1451,7 +1452,8 @@ LOG.warn("StoreFile " + sf + " has a null Reader"); continue; } - storefileScanners.add(r.getScanner()); + // Get a scanner that caches the block and uses pread + storefileScanners.add(r.getScanner(true, true)); } // StoreFileGetScan will handle reading this store's storefiles Index: src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java =================================================================== --- src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java (revision 903511) +++ src/java/org/apache/hadoop/hbase/io/HalfHFileReader.java (working copy) @@ -79,13 +79,8 @@ } @Override - public HFileScanner getScanner() { - return this.getScanner(true); - } - - @Override - public HFileScanner getScanner(boolean cacheBlocks) { - final HFileScanner s = super.getScanner(cacheBlocks); + public HFileScanner getScanner(final boolean cacheBlocks, final boolean pread) { + final HFileScanner s = super.getScanner(cacheBlocks, pread); return new HFileScanner() { final HFileScanner delegate = s; public boolean atEnd = false; @@ -222,7 +217,8 @@ if (top) { return super.getLastKey(); } - HFileScanner scanner = getScanner(); + // Get a scanner that caches the block and that uses pread. + HFileScanner scanner = getScanner(true, true); try { if (scanner.seekBefore(this.splitkey)) { return Bytes.toBytes(scanner.getKey()); Index: src/java/org/apache/hadoop/hbase/io/hfile/HFile.java =================================================================== --- src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (revision 903511) +++ src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (working copy) @@ -782,7 +782,7 @@ public boolean inMemory() { return this.inMemory; } - + /** * Read in the index and file info. * @return A map of fileinfo data. @@ -857,24 +857,16 @@ * Call {@link HFileScanner#seekTo(byte[])} to position an start the read. * There is nothing to clean up in a Scanner. Letting go of your references * to the scanner is sufficient. + * @param pread Use positional read rather than seek+read if true (pread is + * better for random reads, seek+read is better scanning). + * @param cacheBlocks True if we should cache blocks read in by this scanner. * @return Scanner on this file. */ - public HFileScanner getScanner() { - return new Scanner(this, true); + public HFileScanner getScanner(boolean cacheBlocks, final boolean pread) { + return new Scanner(this, cacheBlocks, pread); } /** - * Create a Scanner on this file. No seeks or reads are done on creation. - * Call {@link HFileScanner#seekTo(byte[])} to position an start the read. - * There is nothing to clean up in a Scanner. Letting go of your references - * to the scanner is sufficient. - * @return Scanner on this file. - */ - public HFileScanner getScanner(boolean cacheBlocks) { - return new Scanner(this, cacheBlocks); - } - - /** * @param key Key to search. * @return Block number of the block containing the key or -1 if not in this * file. @@ -909,7 +901,7 @@ } ByteBuffer buf = decompress(metaIndex.blockOffsets[block], - longToInt(blockSize), metaIndex.blockDataSizes[block]); + longToInt(blockSize), metaIndex.blockDataSizes[block], true); byte [] magic = new byte[METABLOCKMAGIC.length]; buf.get(magic, 0, magic.length); @@ -926,10 +918,13 @@ /** * Read in a file block. * @param block Index of block to read. + * @param pread Use positional read instead of seek+read (positional is + * better doing random reads whereas seek+read is better scanning). * @return Block wrapped in a ByteBuffer. * @throws IOException */ - ByteBuffer readBlock(int block, boolean cacheBlock) throws IOException { + ByteBuffer readBlock(int block, boolean cacheBlock, final boolean pread) + throws IOException { if (blockIndex == null) { throw new IOException("Block index not loaded"); } @@ -969,7 +964,8 @@ blockIndex.blockOffsets[block]; } ByteBuffer buf = decompress(blockIndex.blockOffsets[block], - longToInt(onDiskBlockSize), this.blockIndex.blockDataSizes[block]); + longToInt(onDiskBlockSize), this.blockIndex.blockDataSizes[block], + pread); byte [] magic = new byte[DATABLOCKMAGIC.length]; buf.get(magic, 0, magic.length); @@ -999,11 +995,12 @@ * @param offset * @param compressedSize * @param decompressedSize + * * @return * @throws IOException */ private ByteBuffer decompress(final long offset, final int compressedSize, - final int decompressedSize) + final int decompressedSize, final boolean pread) throws IOException { Decompressor decompressor = null; ByteBuffer buf = null; @@ -1014,7 +1011,8 @@ // bunch of data w/o regard to whether decompressor is coming to end of a // decompression. InputStream is = this.compressAlgo.createDecompressionStream( - new BoundedRangeFileInputStream(this.istream, offset, compressedSize), + new BoundedRangeFileInputStream(this.istream, offset, compressedSize, + pread), decompressor, 0); buf = ByteBuffer.allocate(decompressedSize); IOUtils.readFully(is, buf.array(), 0, buf.capacity()); @@ -1026,7 +1024,7 @@ } return buf; } - + /** * @return First key in the file. May be null if file has no entries. */ @@ -1095,21 +1093,19 @@ private final Reader reader; private ByteBuffer block; private int currBlock; - - private boolean cacheBlocks = false; + private final boolean cacheBlocks; + private final boolean pread; + private int currKeyLen = 0; private int currValueLen = 0; public int blockFetches = 0; - public Scanner(Reader r) { + public Scanner(Reader r, boolean cacheBlocks, final boolean pread) { this.reader = r; - } - - public Scanner(Reader r, boolean cacheBlocks) { - this.reader = r; this.cacheBlocks = cacheBlocks; + this.pread = pread; } public KeyValue getKeyValue() { @@ -1160,7 +1156,7 @@ block = null; return false; } - block = reader.readBlock(currBlock, cacheBlocks); + block = reader.readBlock(this.currBlock, this.cacheBlocks, this.pread); currKeyLen = block.getInt(); currValueLen = block.getInt(); blockFetches++; @@ -1292,7 +1288,7 @@ return true; } currBlock = 0; - block = reader.readBlock(currBlock, cacheBlocks); + block = reader.readBlock(this.currBlock, this.cacheBlocks, this.pread); currKeyLen = block.getInt(); currValueLen = block.getInt(); blockFetches++; @@ -1301,12 +1297,12 @@ private void loadBlock(int bloc) throws IOException { if (block == null) { - block = reader.readBlock(bloc, cacheBlocks); + block = reader.readBlock(bloc, this.cacheBlocks, this.pread); currBlock = bloc; blockFetches++; } else { if (bloc != currBlock) { - block = reader.readBlock(bloc, cacheBlocks); + block = reader.readBlock(bloc, this.cacheBlocks, this.pread); currBlock = bloc; blockFetches++; } else { @@ -1740,7 +1736,7 @@ HFile.Reader reader = new HFile.Reader(fs, file, null, false); Map fileInfo = reader.loadFileInfo(); // scan over file and read key/value's and check if requested - HFileScanner scanner = reader.getScanner(); + HFileScanner scanner = reader.getScanner(false, false); scanner.seekTo(); KeyValue pkv = null; int count = 0; Index: src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java =================================================================== --- src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java (revision 903511) +++ src/java/org/apache/hadoop/hbase/io/hfile/BoundedRangeFileInputStream.java (working copy) @@ -36,6 +36,7 @@ private long end; private long mark; private final byte[] oneByte = new byte[1]; + private final boolean pread; /** * Constructor @@ -46,12 +47,13 @@ * Beginning offset of the region. * @param length * Length of the region. + * @param pread If true, use Filesystem positional read rather than seek+read. * * The actual length of the region may be smaller if (off_begin + * length) goes beyond the end of FS input stream. */ public BoundedRangeFileInputStream(FSDataInputStream in, long offset, - long length) { + long length, final boolean pread) { if (offset < 0 || length < 0) { throw new IndexOutOfBoundsException("Invalid offset/length: " + offset + "/" + length); @@ -61,6 +63,7 @@ this.pos = offset; this.end = offset + length; this.mark = -1; + this.pread = pread; } @Override @@ -94,11 +97,14 @@ int n = (int) Math.min(Integer.MAX_VALUE, Math.min(len, (end - pos))); if (n == 0) return -1; int ret = 0; - synchronized (in) { - in.seek(pos); - ret = in.read(b, off, n); + if (this.pread) { + ret = in.read(pos, b, off, n); + } else { + synchronized (in) { + in.seek(pos); + ret = in.read(b, off, n); + } } - // / ret = in.read(pos, b, off, n); if (ret < 0) { end = pos; return -1;