Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1083087) +++ lucene/CHANGES.txt (working copy) @@ -397,6 +397,11 @@ * LUCENE-2936: PhraseQuery score explanations were not correctly identifying matches vs non-matches. (hossman) +* LUCENE-2975: A hotspot bug corrupts IndexInput#readVInt()/readVLong() if + the underlying readByte() is inlined (which happens e.g. in MMapDirectory). + The loop was unwinded which makes the hotspot bug disappear. + (Uwe Schindler, Robert Muir, Mike McCandless) + New features * LUCENE-2128: Parallelized fetching document frequencies during weight Index: lucene/src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- lucene/src/java/org/apache/lucene/store/IndexInput.java (revision 1083087) +++ lucene/src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -78,6 +78,9 @@ * @see IndexOutput#writeVInt(int) */ public int readVInt() throws IOException { + /* This is the original code of this method, + * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if + * readByte() is inlined. So the loop was unwinded! byte b = readByte(); int i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { @@ -85,6 +88,22 @@ i |= (b & 0x7F) << shift; } return i; + */ + byte b = readByte(); + int i = b & 0x7F; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7F) << 7; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7F) << 14; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7F) << 21; + if ((b & 0x80) == 0) return i; + b = readByte(); + assert (b & 0x80) == 0; + return i | ((b & 0x7F) << 28); } /** Reads eight bytes and returns a long. @@ -98,6 +117,9 @@ * nine bytes. Smaller values take fewer bytes. Negative numbers are not * supported. */ public long readVLong() throws IOException { + /* This is the original code of this method, + * but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if + * readByte() is inlined. So the loop was unwinded! byte b = readByte(); long i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { @@ -105,6 +127,34 @@ i |= (b & 0x7FL) << shift; } return i; + */ + byte b = readByte(); + long i = b & 0x7FL; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 7; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 14; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 21; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 28; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 35; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 42; + if ((b & 0x80) == 0) return i; + b = readByte(); + i |= (b & 0x7FL) << 49; + if ((b & 0x80) == 0) return i; + b = readByte(); + assert (b & 0x80) == 0; + return i | ((b & 0x7FL) << 56); } /** Call this if readString should read characters stored Index: lucene/src/test/org/apache/lucene/index/TestIndexInput.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexInput.java (revision 1083087) +++ lucene/src/test/org/apache/lucene/index/TestIndexInput.java (working copy) @@ -19,51 +19,60 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMDirectory; import java.io.IOException; public class TestIndexInput extends LuceneTestCase { - public void testRead() throws IOException { - IndexInput is = new MockIndexInput(new byte[] { - (byte) 0x80, 0x01, - (byte) 0xFF, 0x7F, - (byte) 0x80, (byte) 0x80, 0x01, - (byte) 0x81, (byte) 0x80, 0x01, - 0x06, 'L', 'u', 'c', 'e', 'n', 'e', - // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") - 0x02, (byte) 0xC2, (byte) 0xBF, - 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, - 'c', 'e', (byte) 0xC2, (byte) 0xBF, - 'n', 'e', + static final byte[] READ_TEST_BYTES = new byte[] { + (byte) 0x80, 0x01, + (byte) 0xFF, 0x7F, + (byte) 0x80, (byte) 0x80, 0x01, + (byte) 0x81, (byte) 0x80, 0x01, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x07, + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0x7F, + 0x06, 'L', 'u', 'c', 'e', 'n', 'e', - // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") - 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0, - 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, - 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, - 'n', 'e', + // 2-byte UTF-8 (U+00BF "INVERTED QUESTION MARK") + 0x02, (byte) 0xC2, (byte) 0xBF, + 0x0A, 'L', 'u', (byte) 0xC2, (byte) 0xBF, + 'c', 'e', (byte) 0xC2, (byte) 0xBF, + 'n', 'e', - // surrogate pairs - // (U+1D11E "MUSICAL SYMBOL G CLEF") - // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") - 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, - 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, - (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, - 0x0E, 'L', 'u', - (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, - 'c', 'e', - (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, - 'n', 'e', + // 3-byte UTF-8 (U+2620 "SKULL AND CROSSBONES") + 0x03, (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 0x0C, 'L', 'u', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 'c', 'e', (byte) 0xE2, (byte) 0x98, (byte) 0xA0, + 'n', 'e', - // null bytes - 0x01, 0x00, - 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e', - }); - + // surrogate pairs + // (U+1D11E "MUSICAL SYMBOL G CLEF") + // (U+1D160 "MUSICAL SYMBOL EIGHTH NOTE") + 0x04, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + 0x08, (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, + 0x0E, 'L', 'u', + (byte) 0xF0, (byte) 0x9D, (byte) 0x84, (byte) 0x9E, + 'c', 'e', + (byte) 0xF0, (byte) 0x9D, (byte) 0x85, (byte) 0xA0, + 'n', 'e', + + // null bytes + 0x01, 0x00, + 0x08, 'L', 'u', 0x00, 'c', 'e', 0x00, 'n', 'e', + }; + + private void checkReads(IndexInput is) throws IOException { assertEquals(128,is.readVInt()); assertEquals(16383,is.readVInt()); assertEquals(16384,is.readVInt()); assertEquals(16385,is.readVInt()); + assertEquals(Integer.MAX_VALUE, is.readVInt()); + assertEquals((long) Integer.MAX_VALUE, is.readVLong()); + assertEquals(Long.MAX_VALUE, is.readVLong()); assertEquals("Lucene",is.readString()); assertEquals("\u00BF",is.readString()); @@ -80,6 +89,25 @@ assertEquals("Lu\u0000ce\u0000ne",is.readString()); } + // this test only checks BufferedIndexInput because MockIndexInput extends BufferedIndexInput + public void testBufferedIndexInputRead() throws IOException { + final IndexInput is = new MockIndexInput(READ_TEST_BYTES); + checkReads(is); + is.close(); + } + + // this test checks the raw IndexInput methods as it uses RAMIndexInput which extends IndexInput directly + public void testRawIndexInputRead() throws IOException { + final RAMDirectory dir = new RAMDirectory(); + final IndexOutput os = dir.createOutput("foo"); + os.writeBytes(READ_TEST_BYTES, READ_TEST_BYTES.length); + os.close(); + final IndexInput is = dir.openInput("foo"); + checkReads(is); + is.close(); + dir.close(); + } + /** * Expert *