Index: lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (revision 1480640) +++ lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (working copy) @@ -35,6 +35,8 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; + class Lucene40DocValuesWriter extends DocValuesConsumer { private final Directory dir; private final SegmentWriteState state; @@ -156,6 +158,9 @@ int minLength = Integer.MAX_VALUE; int maxLength = Integer.MIN_VALUE; for (BytesRef b : values) { + if (b.length > (BYTE_BLOCK_SIZE - 2)) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); + } minLength = Math.min(minLength, b.length); maxLength = Math.max(maxLength, b.length); if (uniqueValues != null) { Index: lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (revision 1480640) +++ lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; @@ -29,12 +31,14 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * @@ -326,29 +330,96 @@ iwriter.close(); directory.close(); } + + // nocommit also add explicit test for Facet42DVConsumer (it's + // not in the random LTC rotation) public void testTooLargeBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); + Directory d = newDirectory(); + boolean doFixed = random().nextBoolean(); + int numDocs; + int fixedLength = 0; + if (doFixed) { + // Sometimes make all values fixed length since some + // codecs have different code paths for this: + numDocs = _TestUtil.nextInt(random(), 10, 20); + fixedLength = _TestUtil.nextInt(random(), 65537, 256*1024); + } else { + numDocs = _TestUtil.nextInt(random(), 100, 200); + } + IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + List docBytes = new ArrayList(); + long totalBytes = 0; + for(int docID=0;docID 64KB in size to ensure more than 2 pages in + // PagedBytes would be needed: + int numBytes; + if (doFixed) { + numBytes = fixedLength; + } else if (docID == 0 || random().nextInt(5) == 3) { + numBytes = _TestUtil.nextInt(random(), 65537, 3*1024*1024); + } else { + numBytes = _TestUtil.nextInt(random(), 1, 1024*1024); + } + totalBytes += numBytes; + if (totalBytes > 5 * 1024*1024) { + break; + } + byte[] bytes = new byte[numBytes]; + random().nextBytes(bytes); + docBytes.add(bytes); + Document doc = new Document(); + BytesRef b = new BytesRef(bytes); + b.length = bytes.length; + doc.add(new BinaryDocValuesField("field", b)); + doc.add(new StringField("id", ""+docID, Field.Store.YES)); + try { + w.addDocument(doc); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + // OK: some codecs can't handle binary DV > 32K + w.rollback(); + d.close(); + return; + } + } + } + + DirectoryReader r; try { - iwriter.addDocument(doc); - fail("did not get expected exception"); - } catch (IllegalArgumentException expected) { - // expected + r = w.getReader(); + } catch (IllegalArgumentException iae) { + if (iae.getMessage().indexOf("is too large") == -1) { + throw iae; + } else { + // OK: some codecs can't handle binary DV > 32K + w.rollback(); + d.close(); + return; + } } - iwriter.close(); + w.close(); - directory.close(); + AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); + + BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field"); + for(int docID=0;docID (BYTE_BLOCK_SIZE - 2)) { + throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); + } minLength = Math.min(minLength, v.length); maxLength = Math.max(maxLength, v.length); data.writeBytes(v.bytes, v.offset, v.length); Index: lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (revision 1480640) +++ lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java (working copy) @@ -28,9 +28,6 @@ import org.apache.lucene.util.Counter; import org.apache.lucene.util.packed.AppendingLongBuffer; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - - /** Buffers up pending byte[] per doc, then flushes when * segment flushes. */ class BinaryDocValuesWriter extends DocValuesWriter { @@ -53,9 +50,6 @@ if (value == null) { throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed"); } - if (value.length > (BYTE_BLOCK_SIZE - 2)) { - throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + (BYTE_BLOCK_SIZE - 2)); - } // Fill in any holes: while(addedValues < docID) { Index: lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java (revision 1480640) +++ lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java (working copy) @@ -345,7 +345,7 @@ System.arraycopy(buffer, pos, bytes, bytesOffset, bytesLength); break; } else { - final int bytesToCopy = length - overflow; + final int bytesToCopy = Math.min(length - overflow, BYTE_BLOCK_SIZE); System.arraycopy(buffer, pos, bytes, bytesOffset, bytesToCopy); pos = 0; bytesLength -= bytesToCopy;