Index: lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (revision 1482419) +++ lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (working copy) @@ -46,6 +46,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene42.Lucene42Codec; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; @@ -54,12 +55,12 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.index.CheckIndex.Status.FieldNormStatus; import org.apache.lucene.index.CheckIndex.Status.StoredFieldStatus; import org.apache.lucene.index.CheckIndex.Status.TermIndexStatus; import org.apache.lucene.index.CheckIndex.Status.TermVectorStatus; +import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; @@ -745,6 +746,19 @@ } } + public static String getDocValuesFormat(String field) { + return getDocValuesFormat(Codec.getDefault(), field); + } + + public static String getDocValuesFormat(Codec codec, String field) { + DocValuesFormat f = codec.docValuesFormat(); + if (f instanceof PerFieldDocValuesFormat) { + return ((PerFieldDocValuesFormat) f).getDocValuesFormatForField(field).getName(); + } else { + return f.getName(); + } + } + public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException { String[] files = dir.listAll(); if (files.length > 1 || (files.length == 1 && !files[0].equals("write.lock"))) { Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (revision 1482419) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (working copy) @@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + class Lucene40DocValuesWriter extends DocValuesConsumer { private final Directory dir; private final SegmentWriteState state; @@ -156,6 +158,9 @@ int minLength = Integer.MAX_VALUE; int maxLength = Integer.MIN_VALUE; for (BytesRef b : values) { + if (b.length > (BYTE_BLOCK_SIZE - 2)) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); + } minLength = Math.min(minLength, b.length); maxLength = Math.max(maxLength, b.length); if (uniqueValues != null) { Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (revision 1482419) +++ lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (working copy) @@ -2060,4 +2060,103 @@ } } + // LUCENE-4853 + public void testHugeBinaryValues() throws Exception { + Analyzer analyzer = new MockAnalyzer(random()); + // FSDirectory because SimpleText will consume gobbs of + // space when storing big binary values: + Directory d = newFSDirectory(_TestUtil.getTempDir("hugeBinaryValues")); + boolean doFixed = random().nextBoolean(); + int numDocs; + int fixedLength = 0; + if (doFixed) { + // Sometimes make all values fixed length since some + // codecs have different code paths for this: + numDocs = _TestUtil.nextInt(random(), 10, 20); + fixedLength = _TestUtil.nextInt(random(), 65537, 256*1024); + } else { + numDocs = _TestUtil.nextInt(random(), 100, 200); + } + IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + List docBytes = new ArrayList(); + long totalBytes = 0; + for(int docID=0;docID 64KB in size to ensure more than 2 pages in + // PagedBytes would be needed: + int numBytes; + if (doFixed) { + numBytes = fixedLength; + } else if (docID == 0 || random().nextInt(5) == 3) { + numBytes = _TestUtil.nextInt(random(), 65537, 3*1024*1024); + } else { + numBytes = _TestUtil.nextInt(random(), 1, 1024*1024); + } + totalBytes += numBytes; + if (totalBytes > 5 * 1024*1024) { + break; + } + byte[] bytes = new byte[numBytes]; + random().nextBytes(bytes); + docBytes.add(bytes); + Document doc = new Document(); + BytesRef b = new BytesRef(bytes); + b.length = bytes.length; + doc.add(new BinaryDocValuesField("field", b)); + doc.add(new StringField("id", ""+docID, Field.Store.YES)); + try { + w.addDocument(doc); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + // OK: some codecs can't handle binary DV > 32K + assertFalse(codecAcceptsHugeBinaryValues()); + w.rollback(); + d.close(); + return; + } + } + } + + DirectoryReader r; + try { + r = w.getReader(); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + assertFalse(codecAcceptsHugeBinaryValues()); + + // OK: some codecs can't handle binary DV > 32K + w.rollback(); + d.close(); + return; + } + } + w.close(); + + AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); + + BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field"); + for(int docID=0;docID 32 KB for one + // document, we don't hit exc when using Facet42DocValuesFormat + public void testManyFacetsInOneDocument() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(new Facet42Codec()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); + DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + FacetFields facetFields = new FacetFields(taxoWriter); + + int numLabels = _TestUtil.nextInt(random(), 40000, 100000); + + Document doc = new Document(); + doc.add(newTextField("field", "text", Field.Store.NO)); + List paths = new ArrayList(); + for(int i=0;i results = c.getFacetResults(); + assertEquals(1, results.size()); + FacetResultNode root = results.get(0).getFacetResultNode(); + assertEquals(numLabels, root.subResults.size()); + Set allLabels = new HashSet(); + for(FacetResultNode childNode : root.subResults) { + assertEquals(2, childNode.label.length); + allLabels.add(childNode.label.components[1]); + assertEquals(1, (int) childNode.value); + } + assertEquals(numLabels, allLabels.size()); + + IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir); + } } Index: lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java (working copy) @@ -46,6 +46,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; /** * Basic tests of PerFieldDocValuesFormat @@ -63,6 +64,13 @@ protected Codec getCodec() { return codec; } + + @Override + protected boolean codecAcceptsHugeBinaryValues() { + String dvFormat = _TestUtil.getDocValuesFormat("field"); + // Asserting wraps Lucene42: + return !dvFormat.equals("Lucene40") && !dvFormat.equals("Lucene42") && !dvFormat.equals("Asserting"); + } // just a simple trivial test // TODO: we should come up with a test that somehow checks that segment suffix Index: lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40DocValuesFormat.java (working copy) @@ -30,5 +30,11 @@ protected Codec getCodec() { return codec; } - + + // LUCENE-4583: This codec should throw IAE on huge binary values: + @Override + protected boolean codecAcceptsHugeBinaryValues() { + return false; + } + } Index: lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/codecs/lucene42/TestLucene42DocValuesFormat.java (working copy) @@ -30,4 +30,9 @@ protected Codec getCodec() { return codec; } + + @Override + protected boolean codecAcceptsHugeBinaryValues() { + return false; + } } Index: lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (working copy) @@ -326,31 +326,7 @@ iwriter.close(); directory.close(); } - - public void testTooLargeBytes() throws IOException { - Analyzer analyzer = new MockAnalyzer(random()); - Directory directory = newDirectory(); - // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); - iwc.setMergePolicy(newLogMergePolicy()); - IndexWriter iwriter = new IndexWriter(directory, iwc); - Document doc = new Document(); - byte bytes[] = new byte[100000]; - BytesRef b = new BytesRef(bytes); - random().nextBytes(bytes); - doc.add(new BinaryDocValuesField("dv", b)); - try { - iwriter.addDocument(doc); - fail("did not get expected exception"); - } catch (IllegalArgumentException expected) { - // expected - } - iwriter.close(); - - directory.close(); - } - public void testTooLargeSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Index: lucene/core/src/test/org/apache/lucene/index/TestDocValuesFormat.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDocValuesFormat.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/index/TestDocValuesFormat.java (working copy) @@ -28,4 +28,11 @@ protected Codec getCodec() { return Codec.getDefault(); } + + // LUCENE-4583: This codec should throw IAE on huge binary values: + @Override + protected boolean codecAcceptsHugeBinaryValues() { + return false; + } + } Index: lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (revision 1482419) +++ lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -30,6 +31,9 @@ public class TestPagedBytes extends LuceneTestCase { + // Writes random byte/s to "normal" file in dir, then + // copies into PagedBytes and verifies with + // PagedBytes.Reader: public void testDataInputOutput() throws Exception { Random random = random(); for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) { @@ -90,6 +94,60 @@ } } + // Writes random byte/s into PagedBytes via + // .getDataOutput(), then verifies with + // PagedBytes.getDataInput(): + public void testDataInputOutput2() throws Exception { + Random random = random(); + for(int iter=0;iter<5*RANDOM_MULTIPLIER;iter++) { + final int blockBits = _TestUtil.nextInt(random, 1, 20); + final int blockSize = 1 << blockBits; + final PagedBytes p = new PagedBytes(blockBits); + final DataOutput out = p.getDataOutput(); + final int numBytes = random().nextInt(10000000); + + final byte[] answer = new byte[numBytes]; + random().nextBytes(answer); + int written = 0; + while(written < numBytes) { + if (random().nextInt(10) == 7) { + out.writeByte(answer[written++]); + } else { + int chunk = Math.min(random().nextInt(1000), numBytes - written); + out.writeBytes(answer, written, chunk); + written += chunk; + } + } + + final PagedBytes.Reader reader = p.freeze(random.nextBoolean()); + + final DataInput in = p.getDataInput(); + + final byte[] verify = new byte[numBytes]; + int read = 0; + while(read < numBytes) { + if (random().nextInt(10) == 7) { + verify[read++] = in.readByte(); + } else { + int chunk = Math.min(random().nextInt(1000), numBytes - read); + in.readBytes(verify, read, chunk); + read += chunk; + } + } + assertTrue(Arrays.equals(answer, verify)); + + final BytesRef slice = new BytesRef(); + for(int iter2=0;iter2<100;iter2++) { + final int pos = random.nextInt(numBytes-1); + final int len = random.nextInt(Math.min(blockSize+1, numBytes - pos)); + reader.fillSlice(slice, pos, len); + for(int byteUpto=0;byteUpto blocks = new ArrayList(); + // TODO: these are unused? private final List blockEnd = new ArrayList(); private final int blockSize; private final int blockBits; @@ -42,6 +45,7 @@ private boolean frozen; private int upto; private byte[] currentBlock; + private final long bytesUsedPerBlock; private static final byte[] EMPTY_BYTES = new byte[0]; @@ -132,6 +136,7 @@ this.blockBits = blockBits; blockMask = blockSize-1; upto = blockSize; + bytesUsedPerBlock = blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_OBJECT_REF; } /** Read this many bytes from in */ @@ -216,6 +221,11 @@ } } + /** Return approx RAM usage in bytes. */ + public long ramBytesUsed() { + return (blocks.size() + (currentBlock != null ? 1 : 0)) * bytesUsedPerBlock; + } + /** Copy bytes in, writing the length as a 1 or 2 byte * vInt prefix. */ // TODO: this really needs to be refactored into fieldcacheimpl! @@ -249,4 +259,148 @@ return pointer; } + + public final class PagedBytesDataInput extends DataInput { + private int currentBlockIndex; + private int currentBlockUpto; + private byte[] currentBlock; + + PagedBytesDataInput() { + currentBlock = blocks.get(0); + } + + @Override + public PagedBytesDataInput clone() { + PagedBytesDataInput clone = getDataInput(); + clone.setPosition(getPosition()); + return clone; + } + + /** Returns the current byte position. */ + public long getPosition() { + return (long) currentBlockIndex * blockSize + currentBlockUpto; + } + + /** Seek to a position previously obtained from + * {@link #getPosition}. */ + public void setPosition(long pos) { + currentBlockIndex = (int) (pos >> blockBits); + currentBlock = blocks.get(currentBlockIndex); + currentBlockUpto = (int) (pos & blockMask); + } + + @Override + public byte readByte() { + if (currentBlockUpto == blockSize) { + nextBlock(); + } + return currentBlock[currentBlockUpto++]; + } + + @Override + public void readBytes(byte[] b, int offset, int len) { + assert b.length >= offset + len; + final int offsetEnd = offset + len; + while (true) { + final int blockLeft = blockSize - currentBlockUpto; + final int left = offsetEnd - offset; + if (blockLeft < left) { + System.arraycopy(currentBlock, currentBlockUpto, + b, offset, + blockLeft); + nextBlock(); + offset += blockLeft; + } else { + // Last block + System.arraycopy(currentBlock, currentBlockUpto, + b, offset, + left); + currentBlockUpto += left; + break; + } + } + } + + private void nextBlock() { + currentBlockIndex++; + currentBlockUpto = 0; + currentBlock = blocks.get(currentBlockIndex); + } + } + + public final class PagedBytesDataOutput extends DataOutput { + @Override + public void writeByte(byte b) { + if (upto == blockSize) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + } + currentBlock[upto++] = b; + } + + @Override + public void writeBytes(byte[] b, int offset, int length) { + assert b.length >= offset + length; + if (length == 0) { + return; + } + + if (upto == blockSize) { + if (currentBlock != null) { + blocks.add(currentBlock); + blockEnd.add(upto); + } + currentBlock = new byte[blockSize]; + upto = 0; + } + + final int offsetEnd = offset + length; + while(true) { + final int left = offsetEnd - offset; + final int blockLeft = blockSize - upto; + if (blockLeft < left) { + System.arraycopy(b, offset, currentBlock, upto, blockLeft); + blocks.add(currentBlock); + blockEnd.add(blockSize); + currentBlock = new byte[blockSize]; + upto = 0; + offset += blockLeft; + } else { + // Last block + System.arraycopy(b, offset, currentBlock, upto, left); + upto += left; + break; + } + } + } + + /** Return the current byte position. */ + public long getPosition() { + return getPointer(); + } + } + + /** Returns a DataInput to read values from this + * PagedBytes instance. */ + public PagedBytesDataInput getDataInput() { + if (!frozen) { + throw new IllegalStateException("must call freeze() before getDataInput"); + } + return new PagedBytesDataInput(); + } + + /** Returns a DataOutput that you may use to write into + * this PagedBytes instance. If you do this, you should + * not call the other writing methods (eg, copy); + * results are undefined. */ + public PagedBytesDataOutput getDataOutput() { + if (frozen) { + throw new IllegalStateException("cannot get DataOutput after freeze()"); + } + return new PagedBytesDataOutput(); + } } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (revision 1482419) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (working copy) @@ -36,15 +36,17 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.MathUtil; import org.apache.lucene.util.fst.Builder; +import org.apache.lucene.util.fst.FST.INPUT_TYPE; import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.FST.INPUT_TYPE; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; +import org.apache.lucene.util.packed.PackedInts.FormatAndBits; import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PackedInts.FormatAndBits; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + /** * Writer for {@link Lucene42DocValuesFormat} */ @@ -216,6 +218,9 @@ int maxLength = Integer.MIN_VALUE; final long startFP = data.getFilePointer(); for(BytesRef v : values) { + if (v.length > (BYTE_BLOCK_SIZE - 2)) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); + } minLength = Math.min(minLength, v.length); maxLength = Math.max(maxLength, v.length); data.writeBytes(v.bytes, v.offset, v.length); Index: lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (revision 1482419) +++ lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (working copy) @@ -22,28 +22,42 @@ import java.util.NoSuchElementException; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; -import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; +import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.AppendingLongBuffer; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - - /** Buffers up pending byte[] per doc, then flushes when * segment flushes. */ class BinaryDocValuesWriter extends DocValuesWriter { - private final ByteBlockPool pool; + /** Maximum length for a binary field; we set this to "a + * bit" below Integer.MAX_VALUE because the exact max + * allowed byte[] is JVM dependent, so we want to avoid + * a case where a large value worked in one JVM but + * failed later at search time with a different JVM. */ + private static final int MAX_LENGTH = Integer.MAX_VALUE-256; + + // 32 KB block sizes for PagedBytes storage: + private final static int BLOCK_BITS = 15; + + private final PagedBytes bytes; + private final DataOutput bytesOut; + + private final Counter iwBytesUsed; private final AppendingLongBuffer lengths; private final FieldInfo fieldInfo; - private int addedValues = 0; + private int addedValues; + private long bytesUsed; public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; - this.pool = new ByteBlockPool(new DirectTrackingAllocator(iwBytesUsed)); + this.bytes = new PagedBytes(BLOCK_BITS); + this.bytesOut = bytes.getDataOutput(); this.lengths = new AppendingLongBuffer(); + this.iwBytesUsed = iwBytesUsed; } public void addValue(int docID, BytesRef value) { @@ -53,10 +67,10 @@ if (value == null) { throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed"); } - if (value.length > (BYTE_BLOCK_SIZE - 2)) { - throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); + if (value.length > MAX_LENGTH) { + throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH); } - + // Fill in any holes: while(addedValues < docID) { addedValues++; @@ -64,9 +78,21 @@ } addedValues++; lengths.add(value.length); - pool.append(value); + try { + bytesOut.writeBytes(value.bytes, value.offset, value.length); + } catch (IOException ioe) { + // Should never happen! + throw new RuntimeException(ioe); + } + updateBytesUsed(); } + private void updateBytesUsed() { + final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed(); + iwBytesUsed.addAndGet(newBytesUsed - bytesUsed); + bytesUsed = newBytesUsed; + } + @Override public void finish(int maxDoc) { } @@ -74,6 +100,7 @@ @Override public void flush(SegmentWriteState state, DocValuesConsumer dvConsumer) throws IOException { final int maxDoc = state.segmentInfo.getDocCount(); + bytes.freeze(false); dvConsumer.addBinaryField(fieldInfo, new Iterable() { @Override @@ -91,10 +118,10 @@ private class BytesIterator implements Iterator { final BytesRef value = new BytesRef(); final AppendingLongBuffer.Iterator lengthsIterator = lengths.iterator(); + final DataInput bytesIterator = bytes.getDataInput(); final int size = (int) lengths.size(); final int maxDoc; int upto; - long byteOffset; BytesIterator(int maxDoc) { this.maxDoc = maxDoc; @@ -114,8 +141,12 @@ int length = (int) lengthsIterator.next(); value.grow(length); value.length = length; - pool.readBytes(byteOffset, value.bytes, value.offset, value.length); - byteOffset += length; + try { + bytesIterator.readBytes(value.bytes, value.offset, value.length); + } catch (IOException ioe) { + // Should never happen! + throw new RuntimeException(ioe); + } } else { // This is to handle last N documents not having // this DV field in the end of the segment: Index: lucene/core/src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldInfo.java (revision 1482419) +++ lucene/core/src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -92,21 +92,22 @@ */ NUMERIC, /** - * A per-document byte[]. + * A per-document byte[]. Values may be larger than + * 32766 bytes, but different codecs may enforce their own limits. */ BINARY, /** * A pre-sorted byte[]. Fields with this type only store distinct byte values * and store an additional offset pointer per document to dereference the shared * byte[]. The stored byte[] is presorted and allows access via document id, - * ordinal and by-value. + * ordinal and by-value. Values must be <= 32766 bytes. */ SORTED, /** * A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values * and store additional offset pointers per document to dereference the shared * byte[]s. The stored byte[] is presorted and allows access via document id, - * ordinal and by-value. + * ordinal and by-value. Values must be <= 32766 bytes. */ SORTED_SET };