Index: src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 926738) +++ src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -286,14 +286,20 @@ // create mapping from id2 space to id2 based on idField idField = StringHelper.intern(idField); - final TermsEnum termsEnum = MultiFields.getFields(r1).terms(idField).iterator(); + final Fields f1 = MultiFields.getFields(r1); + if (f1 == null) { + // make sure r2 is empty + assertNull(MultiFields.getFields(r2)); + return; + } + final TermsEnum termsEnum = f1.terms(idField).iterator(); final Bits delDocs1 = MultiFields.getDeletedDocs(r1); final Bits delDocs2 = MultiFields.getDeletedDocs(r2); Fields fields = MultiFields.getFields(r2); if (fields == null) { - // make sure r1 is in fract empty (eg has only all + // make sure r1 is in fact empty (eg has only all // deleted docs): DocsEnum docs = null; while(termsEnum.next() != null) { Index: src/test/org/apache/lucene/index/FlexTestUtil.java =================================================================== --- src/test/org/apache/lucene/index/FlexTestUtil.java (revision 926738) +++ src/test/org/apache/lucene/index/FlexTestUtil.java (working copy) @@ -110,6 +110,9 @@ private static void testBogusFieldTerms(Random rand, IndexReader r) throws Exception { final Fields fields = MultiFields.getFields(r); + if (fields == null) { + return; + } for(int i=0;i<10;i++) { final String f = "bogus" + rand.nextInt() + "reallybogus"; Terms terms = fields.terms(f); Index: src/test/org/apache/lucene/util/packed/TestPackedInts.java =================================================================== --- src/test/org/apache/lucene/util/packed/TestPackedInts.java (revision 926738) +++ src/test/org/apache/lucene/util/packed/TestPackedInts.java (working copy) @@ -71,16 +71,28 @@ w.add(values[i]); } w.finish(); + final long fp = out.getFilePointer(); out.close(); IndexInput in = d.openInput("out.bin"); PackedInts.Reader r = PackedInts.getReader(in); + assertEquals(fp, in.getFilePointer()); for(int i=0;i termComp; + private final String segment; + private final static int PAGED_BYTES_BITS = 15; + + // all fields share this single logical byte[] + private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS); + final HashMap fields = new HashMap(); public SimpleStandardTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator termComp) @@ -80,6 +87,8 @@ this.termComp = termComp; + this.segment = segment; + IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_INDEX_EXTENSION)); boolean success = false; @@ -118,10 +127,14 @@ System.out.println(" read field number=" + field); } final int numIndexTerms = in.readInt(); + final long termsStart = in.readLong(); final long indexStart = in.readLong(); + final long packedIndexStart = in.readLong(); + final long packedOffsetsStart = in.readLong(); + assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment; if (numIndexTerms > 0) { final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart)); + fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); } } success = true; @@ -130,57 +143,15 @@ in.close(); this.in = null; if (success) { - trimByteBlock(); indexLoaded = true; } + termBytes.finish(); } else { this.in = in; } } } - /* Called when index is fully loaded. We know we will use - * no more bytes in the final byte[], so trim it down to - * its actual usagee. This substantially reduces memory - * usage of SegmentReader searching a tiny segment. */ - private final void trimByteBlock() { - if (blockOffset == 0) { - // There were no fields in this segment: - if (blocks != null) { - blocks[blockUpto] = null; - } - } else { - byte[] last = new byte[blockOffset]; - System.arraycopy(blocks[blockUpto], 0, last, 0, blockOffset); - blocks[blockUpto] = last; - } - } - - // TODO: we can record precisely how many bytes are - // required during indexing, save that into file, and be - // precise when we allocate the blocks; we even don't need - // to use blocks anymore (though my still want to, to - // prevent allocation failure due to mem fragmentation on - // 32bit) - - // Fixed size byte blocks, to hold all term bytes; these - // blocks are shared across fields - private byte[][] blocks; - int blockUpto; - int blockOffset; - - private static final int BYTE_BLOCK_SHIFT = 15; - private static final int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT; - private static final int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; - - static { - // Make sure DW can't ever write a term whose length - // cannot be encoded with short (because we use short[] - // to hold the length of each term). - assert IndexWriter.MAX_TERM_LENGTH < Short.MAX_VALUE; - assert BYTE_BLOCK_SIZE >= IndexWriter.MAX_TERM_LENGTH; - } - private final class FieldIndexReader extends FieldReader { final private FieldInfo fieldInfo; @@ -190,14 +161,21 @@ private final IndexInput in; private final long indexStart; + private final long termsStart; + private final long packedIndexStart; + private final long packedOffsetsStart; private final int numIndexTerms; - public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart) throws IOException { + public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart, + long packedOffsetsStart) throws IOException { this.fieldInfo = fieldInfo; this.in = in; + this.termsStart = termsStart; this.indexStart = indexStart; + this.packedIndexStart = packedIndexStart; + this.packedOffsetsStart = packedOffsetsStart; this.numIndexTerms = numIndexTerms; // We still create the indexReader when indexDivisor @@ -210,6 +188,9 @@ } coreIndex = new CoreFieldIndex(indexStart, + termsStart, + packedIndexStart, + packedOffsetsStart, numIndexTerms); } else { @@ -221,7 +202,7 @@ public void loadTermsIndex() throws IOException { if (coreIndex == null) { - coreIndex = new CoreFieldIndex(indexStart, numIndexTerms); + coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms); } } @@ -263,150 +244,115 @@ private final class CoreFieldIndex { - // TODO: used packed ints here - // Pointer into terms dict file that we are indexing - final long[] fileOffset; + final private long termBytesStart; - // TODO: used packed ints here - // For each term, points to start of term's bytes within - // block. - // TODO: wasteful that this is always long; many terms - // dict indexes obviously don't require so much address - // space; since we know up front during indexing how - // much space is needed we could pack this to the - // precise # bits - final long[] blockPointer; - - // TODO: used packed ints here: we know max term - // length; often its small + // offset into index termBytes + final PackedInts.Reader termOffsets; - // TODO: can we inline this w/ the bytes? like - // DW. vast majority of terms only need 1 byte, not 2 + // index pointers into main terms dict + final PackedInts.Reader termsDictOffsets; - // Length of each term - final short[] termLength; - final int numIndexTerms; - CoreFieldIndex(long indexStart, int numIndexTerms) throws IOException { + final long termsStart; + public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException { + + this.termsStart = termsStart; + termBytesStart = termBytes.getPointer(); + IndexInput clone = (IndexInput) in.clone(); clone.seek(indexStart); - if (indexDivisor == -1) { - // Special case: we are being loaded inside - // IndexWriter because a SegmentReader that at - // first was opened for merging, is now being - // opened to perform deletes or for an NRT reader - this.numIndexTerms = numIndexTerms; - } else { - this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor; - } + // -1 is passed to mean "don't load term index", but + // if we are then later loaded it's overwritten with + // a real value + assert indexDivisor > 0; + this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor; + assert this.numIndexTerms > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor; - if (blocks == null) { - blocks = new byte[1][]; - blocks[0] = new byte[BYTE_BLOCK_SIZE]; - } + if (indexDivisor == 1) { + // Default (load all index terms) is fast -- slurp in the images from disk: + + try { + final long numTermBytes = packedIndexStart - indexStart; + termBytes.copy(clone, numTermBytes); - byte[] lastBlock = blocks[blockUpto]; - int lastBlockOffset = blockOffset; + // records offsets into main terms dict file + termsDictOffsets = PackedInts.getReader(clone); + assert termsDictOffsets.size() == numIndexTerms; - fileOffset = new long[this.numIndexTerms]; - blockPointer = new long[this.numIndexTerms]; - termLength = new short[this.numIndexTerms]; - - final byte[] skipBytes; - if (indexDivisor != 1) { - // only need skipBytes (below) if we are not - // loading all index terms - skipBytes = new byte[128]; + // records offsets into byte[] term data + termOffsets = PackedInts.getReader(clone); + assert termOffsets.size() == 1+numIndexTerms; + } finally { + clone.close(); + } } else { - skipBytes = null; - } + // Get packed iterators + final IndexInput clone1 = (IndexInput) in.clone(); + final IndexInput clone2 = (IndexInput) in.clone(); - int upto = 0; - long pointer = 0; - - for(int i=0;i BYTE_BLOCK_SIZE) { - // New block - final byte[] newBlock = new byte[BYTE_BLOCK_SIZE]; - if (blocks.length == blockUpto+1) { - final int newSize = ArrayUtil.oversize(blockUpto+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF); - final byte[][] newBlocks = new byte[newSize][]; - System.arraycopy(blocks, 0, newBlocks, 0, blocks.length); - blocks = newBlocks; - } - blockUpto++; - blocks[blockUpto] = newBlock; - blockOffset = 0; - } + // TODO: often we can get by w/ fewer bits per + // value, below.. .but this'd be more complex: + // we'd have to try @ fewer bits and then grow + // if we overflowed it. - final byte[] block = blocks[blockUpto]; + PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue()); + PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue()); - // Copy old prefix - assert lastBlock != null || start == 0; - assert block != null; - System.arraycopy(lastBlock, lastBlockOffset, block, blockOffset, start); + termsDictOffsets = termsDictOffsetsM; + termOffsets = termOffsetsM; - // Read new suffix - clone.readBytes(block, blockOffset+start, suffix); + int upto = 0; - // Advance file offset - pointer += clone.readVLong(); + long lastTermOffset = 0; + long termOffsetUpto = 0; - assert thisTermLength < Short.MAX_VALUE; + while(upto < this.numIndexTerms) { + // main file offset copies straight over + termsDictOffsetsM.set(upto, termsDictOffsetsIter.next()); - termLength[upto] = (short) thisTermLength; - fileOffset[upto] = pointer; - blockPointer[upto] = blockUpto * BYTE_BLOCK_SIZE + blockOffset; + termOffsetsM.set(upto, termOffsetUpto); + upto++; - /* - BytesRef tr = new BytesRef(); - tr.bytes = blocks[blockUpto]; - tr.offset = blockOffset; - tr.length = thisTermLength; + long termOffset = termOffsetsIter.next(); + long nextTermOffset = termOffsetsIter.next(); + final int numTermBytes = (int) (nextTermOffset - termOffset); - //System.out.println(" read index term=" + new String(blocks[blockUpto], blockOffset, thisTermLength, "UTF-8") + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset); - //System.out.println(" read index term=" + tr.toBytesString() + " this=" + this + " bytes=" + block + " (vs=" + blocks[blockUpto] + ") offset=" + blockOffset); - */ + clone.seek(indexStart + termOffset); + assert indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length(); + assert indexStart + termOffset + numTermBytes < clone.length(); - lastBlock = block; - lastBlockOffset = blockOffset; - blockOffset += thisTermLength; - upto++; - } else { - // Skip bytes - int toSkip = suffix; - while(true) { - if (toSkip > skipBytes.length) { - clone.readBytes(skipBytes, 0, skipBytes.length); - toSkip -= skipBytes.length; - } else { - clone.readBytes(skipBytes, 0, toSkip); - break; + termBytes.copy(clone, numTermBytes); + termOffsetUpto += numTermBytes; + + // skip terms: + termsDictOffsetsIter.next(); + for(int i=0;i> BYTE_BLOCK_SHIFT)]; - result.term.offset = (int) (loc & BYTE_BLOCK_MASK); - result.term.length = termLength[idx]; + final long offset = termOffsets.get(idx); + final int length = (int) (termOffsets.get(1+idx) - offset); + termBytes.fill(result.term, termBytesStart + offset, length); result.position = idx * totalIndexInterval; - result.offset = fileOffset[idx]; + result.offset = termsStart + termsDictOffsets.get(idx); } public final void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException { if (Codec.DEBUG) { - System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term + " indexLen = " + blockPointer.length + " numIndexTerms=" + fileOffset.length + " numIndexedTerms=" + fileOffset.length); + System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term.utf8ToString()); } int lo = 0; // binary search - int hi = fileOffset.length - 1; + int hi = numIndexTerms - 1; while (hi >= lo) { int mid = (lo + hi) >>> 1; - final long loc = blockPointer[mid]; - result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)]; - result.term.offset = (int) (loc & BYTE_BLOCK_MASK); - result.term.length = termLength[mid]; + final long offset = termOffsets.get(mid); + final int length = (int) (termOffsets.get(1+mid) - offset); + termBytes.fill(result.term, termBytesStart + offset, length); int delta = termComp.compare(term, result.term); if (delta < 0) { @@ -456,7 +400,7 @@ } else { assert mid >= 0; result.position = mid*totalIndexInterval; - result.offset = fileOffset[mid]; + result.offset = termsStart + termsDictOffsets.get(mid); return; } } @@ -465,13 +409,12 @@ hi = 0; } - final long loc = blockPointer[hi]; - result.term.bytes = blocks[(int) (loc >> BYTE_BLOCK_SHIFT)]; - result.term.offset = (int) (loc & BYTE_BLOCK_MASK); - result.term.length = termLength[hi]; + final long offset = termOffsets.get(hi); + final int length = (int) (termOffsets.get(1+hi) - offset); + termBytes.fill(result.term, termBytesStart + offset, length); result.position = hi*totalIndexInterval; - result.offset = fileOffset[hi]; + result.offset = termsStart + termsDictOffsets.get(hi); } public final void getIndexOffset(long ord, TermsIndexResult result) throws IOException { @@ -488,6 +431,7 @@ if (!indexLoaded) { this.indexDivisor = indexDivisor; + this.totalIndexInterval = indexInterval * indexDivisor; // mxx if (Codec.DEBUG) { @@ -498,10 +442,10 @@ while(it.hasNext()) { it.next().loadTermsIndex(); } - trimByteBlock(); indexLoaded = true; in.close(); + termBytes.finish(); } } Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java (revision 926738) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsIndexWriter.java (working copy) @@ -29,6 +29,7 @@ public abstract class FieldWriter { public abstract boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException; + public abstract void finish() throws IOException; } public abstract FieldWriter addField(FieldInfo fieldInfo); Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (revision 926738) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (working copy) @@ -372,7 +372,7 @@ indexReader.getIndexOffset(term, indexResult); if (Codec.DEBUG) { - Codec.debug(" index pos=" + indexResult.position + " termFP=" + indexResult.offset + " term=" + indexResult.term + " this=" + this); + Codec.debug(" index pos=" + indexResult.position + " termFP=" + indexResult.offset + " term=" + indexResult.term.utf8ToString() + " this=" + this); } in.seek(indexResult.offset); @@ -507,6 +507,9 @@ } if (state.ord >= numTerms-1) { + if (Codec.DEBUG) { + Codec.debug(" return null ord=" + state.ord + " vs numTerms-1=" + (numTerms-1)); + } return null; } @@ -514,7 +517,7 @@ state.docFreq = in.readVInt(); if (Codec.DEBUG) { - Codec.debug(" text=" + bytesReader.term + " freq=" + state.docFreq + " tis=" + in); + Codec.debug(" text=" + bytesReader.term.utf8ToString() + " freq=" + state.docFreq + " tis=" + in); } // TODO: would be cleaner, but space-wasting, to Index: src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (revision 926738) +++ src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (working copy) @@ -25,6 +25,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.packed.PackedInts; import java.util.List; import java.util.ArrayList; @@ -58,7 +60,6 @@ // Placeholder for dir offset out.writeLong(0); out.writeInt(termIndexInterval); - termWriter = new DeltaBytesWriter(out); } @Override @@ -66,8 +67,6 @@ this.termsOut = termsOut; } - final private DeltaBytesWriter termWriter; - @Override public FieldWriter addField(FieldInfo field) { SimpleFieldWriter writer = new SimpleFieldWriter(field); @@ -78,33 +77,99 @@ private class SimpleFieldWriter extends FieldWriter { final FieldInfo fieldInfo; int numIndexTerms; - private long lastTermsPointer; final long indexStart; + final long termsStart; + long packedIndexStart; + long packedOffsetsStart; private int numTerms; + // TODO: we could conceivably make a PackedInts wrapper + // that auto-grows... then we wouldn't force 6 bytes RAM + // per index term: + private short[] termLengths; + private int[] termsPointerDeltas; + private long lastTermsPointer; + private long totTermLength; + SimpleFieldWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; indexStart = out.getFilePointer(); - termWriter.reset(); + termsStart = lastTermsPointer = termsOut.getFilePointer(); + termLengths = new short[0]; + termsPointerDeltas = new int[0]; } @Override public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException { // First term is first indexed term: if (0 == (numTerms++ % termIndexInterval)) { - final long termsPointer = termsOut.getFilePointer(); + if (Codec.DEBUG) { - Codec.debug("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text + " termsFP=" + termsPointer + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer()); + Codec.debug("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text.utf8ToString() + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer()); } - termWriter.write(text); - out.writeVLong(termsPointer - lastTermsPointer); - lastTermsPointer = termsPointer; + + // write full bytes + out.writeBytes(text.bytes, text.offset, text.length); + + if (termLengths.length == numIndexTerms) { + termLengths = ArrayUtil.grow(termLengths); + } + if (termsPointerDeltas.length == numIndexTerms) { + termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas); + } + + // save delta terms pointer + final long fp = termsOut.getFilePointer(); + termsPointerDeltas[numIndexTerms] = (int) (fp - lastTermsPointer); + lastTermsPointer = fp; + + // save term length (in bytes) + assert text.length <= Short.MAX_VALUE; + termLengths[numIndexTerms] = (short) text.length; + + totTermLength += text.length; + numIndexTerms++; return true; } else { return false; } } + + @Override + public void finish() throws IOException { + + // write primary terms dict offsets + packedIndexStart = out.getFilePointer(); + + final long maxValue = termsOut.getFilePointer(); + PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(maxValue)); + + // relative to our indexStart + long upto = 0; + for(int i=0;i blocks = new ArrayList(); + private final int blockSize; + private final int blockBits; + private final int blockMask; + private int upto; + private byte[] currentBlock; + private final CloseableThreadLocal threadBuffers = new CloseableThreadLocal(); + + private static final byte[] EMPTY_BYTES = new byte[0]; + + /** 1< 0) { + int left = blockSize - upto; + if (left == 0) { + if (currentBlock != null) { + blocks.add(currentBlock); + } + currentBlock = new byte[blockSize]; + upto = 0; + left = blockSize; + } + if (left < byteCount) { + in.readBytes(currentBlock, upto, left, false); + upto = blockSize; + byteCount -= left; + } else { + in.readBytes(currentBlock, upto, (int) byteCount, false); + upto += byteCount; + byteCount = 0; + } + } + } + + /** Commits final byte[], trimming it if necessary. */ + public void finish() { + if (upto < blockSize) { + final byte[] newBlock = new byte[upto]; + System.arraycopy(currentBlock, 0, newBlock, 0, upto); + currentBlock = newBlock; + } + if (currentBlock == null) { + currentBlock = EMPTY_BYTES; + } + blocks.add(currentBlock); + currentBlock = null; + } + + public long getPointer() { + if (currentBlock == null) { + return 0; + } else { + return (blocks.size() * ((long) blockSize)) + upto; + } + } + + /** Get a slice out of the byte array. */ + public void fill(BytesRef b, long start, int length) { + assert length >= 0: "length=" + length; + final int index = (int) (start >> blockBits); + final int offset = (int) (start & blockMask); + b.length = length; + if (blockSize - offset >= length) { + // Within block + b.bytes = blocks.get(index); + b.offset = offset; + } else { + // Split + byte[] buffer = threadBuffers.get(); + if (buffer == null) { + buffer = new byte[length]; + threadBuffers.set(buffer); + } else if (buffer.length < length) { + buffer = ArrayUtil.grow(buffer, length); + threadBuffers.set(buffer); + } + b.bytes = buffer; + b.offset = 0; + System.arraycopy(blocks.get(index), offset, buffer, 0, blockSize-offset); + System.arraycopy(blocks.get(1+index), 0, buffer, blockSize-offset, length-(blockSize-offset)); + } + } + + public void close() { + threadBuffers.close(); + } +} Property changes on: src/java/org/apache/lucene/index/codecs/standard/PagedBytes.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Packed64.java =================================================================== --- src/java/org/apache/lucene/util/packed/Packed64.java (revision 926738) +++ src/java/org/apache/lucene/util/packed/Packed64.java (working copy) @@ -148,6 +148,7 @@ super(valueCount, bitsPerValue); int size = size(valueCount, bitsPerValue); blocks = new long[size+1]; // +1 due to non-conditional tricks + // TODO: find a faster way to bulk-read longs... for(int i=0;i= bitsPerValue) { + // not split + final long result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1]; + pendingBitsLeft -= bitsPerValue; + return result; + } else { + // split + final int bits1 = bitsPerValue - pendingBitsLeft; + final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1; + pending = in.readLong(); + final long result2 = (pending >> (64 - bits1)) & masks[bits1-1]; + pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue; + return result1 | result2; + } + } + + public void close() throws IOException { + in.close(); + } +} Property changes on: src/java/org/apache/lucene/util/packed/PackedReaderIterator.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/PackedInts.java =================================================================== --- src/java/org/apache/lucene/util/packed/PackedInts.java (revision 926738) +++ src/java/org/apache/lucene/util/packed/PackedInts.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.io.Closeable; + import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.CodecUtil; @@ -65,6 +67,18 @@ } /** + * Run-once iterator interface, to decode previously saved PackedInts. + */ + public static interface ReaderIterator extends Closeable { + /** Returns next value */ + long next() throws IOException; + /** Returns number of bits per value */ + int getBitsPerValue(); + /** Returns number of values */ + int size(); + } + + /** * A packed integer array that can be modified. * @lucene.internal */ @@ -167,6 +181,22 @@ } /** + * Retrieve PackedInts as a {@link ReaderIterator} + * @param in positioned at the beginning of a stored packed int structure. + * @return an iterator to access the values + * @throws IOException if the structure could not be retrieved. + * @lucene.internal + */ + public static ReaderIterator getReaderIterator(IndexInput in) throws IOException { + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START); + final int bitsPerValue = in.readVInt(); + assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; + final int valueCount = in.readVInt(); + + return new PackedReaderIterator(bitsPerValue, valueCount, in); + } + + /** * Create a packed integer array with the given amount of values initialized * to 0. the valueCount and the bitsPerValue cannot be changed after creation. * All Mutables known by this factory are kept fully in RAM. @@ -228,7 +258,7 @@ } if (maxValue > 0x1FFFFFFFFFFFFFFFL) { return 62; } - return (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)); + return Math.max(1, (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0))); } /** Index: src/java/org/apache/lucene/util/packed/Packed32.java =================================================================== --- src/java/org/apache/lucene/util/packed/Packed32.java (revision 926738) +++ src/java/org/apache/lucene/util/packed/Packed32.java (working copy) @@ -129,6 +129,7 @@ super(valueCount, bitsPerValue); int size = size(bitsPerValue, valueCount); blocks = new int[size + 1]; // +1 due to non-conditional tricks + // TODO: find a faster way to bulk-read ints... for(int i = 0 ; i < size ; i++) { blocks[i] = in.readInt(); } Index: src/java/org/apache/lucene/util/ArrayUtil.java =================================================================== --- src/java/org/apache/lucene/util/ArrayUtil.java (revision 926738) +++ src/java/org/apache/lucene/util/ArrayUtil.java (working copy) @@ -232,6 +232,29 @@ return currentSize; } + public static short[] grow(short[] array, int minSize) { + if (array.length < minSize) { + short[] newArray = new short[oversize(minSize, RamUsageEstimator.NUM_BYTES_SHORT)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static short[] grow(short[] array) { + return grow(array, 1 + array.length); + } + + public static short[] shrink(short[] array, int targetSize) { + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT); + if (newSize != array.length) { + short[] newArray = new short[newSize]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else + return array; + } + public static int[] grow(int[] array, int minSize) { if (array.length < minSize) { int[] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_INT)];