Index: lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (revision 1361692) +++ lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java (working copy) @@ -25,8 +25,11 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.Directory; +import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.Token; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.util.Constants; @@ -93,6 +96,24 @@ assertTrue(checker.checkIndex(onlySegments).clean == true); dir.close(); } + + // LUCENE-4221: we have to let these thru, for now + public void testBogusTermVectors() throws IOException { + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + Field field = new Field("foo", "", ft); + field.setTokenStream(new CannedTokenStream( + new Token("bar", 5, 10), new Token("bar", 1, 4) + )); + doc.add(field); + iw.addDocument(doc); + iw.close(); + dir.close(); // checkindex + } public void testLuceneConstantVersion() throws IOException { // common-build.xml sets lucene.version Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1361692) +++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -668,7 +668,7 @@ * checks Fields api is consistent with itself. * searcher is optional, to verify with queries. Can be null. */ - private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint) throws IOException { + private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors) throws IOException { // TODO: we should probably return our own stats thing...?! final Status.TermIndexStatus status = new Status.TermIndexStatus(); @@ -863,18 +863,22 @@ if (hasOffsets) { int startOffset = postings.startOffset(); int endOffset = postings.endOffset(); - if (startOffset < 0) { - throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); + // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? + // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter + if (!isVectors) { + if (startOffset < 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); + } + if (startOffset < lastOffset) { + throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); + } + if (endOffset < 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); + } + if (endOffset < startOffset) { + throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); + } } - if (startOffset < lastOffset) { - throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); - } - if (endOffset < 0) { - throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); - } - if (endOffset < startOffset) { - throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); - } lastOffset = startOffset; } } @@ -956,18 +960,22 @@ if (hasOffsets) { int startOffset = postings.startOffset(); int endOffset = postings.endOffset(); - if (startOffset < 0) { - throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); + // NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before? + // but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter + if (!isVectors) { + if (startOffset < 0) { + throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds"); + } + if (startOffset < lastOffset) { + throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); + } + if (endOffset < 0) { + throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); + } + if (endOffset < startOffset) { + throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); + } } - if (startOffset < lastOffset) { - throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset); - } - if (endOffset < 0) { - throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds"); - } - if (endOffset < startOffset) { - throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset); - } lastOffset = startOffset; } } @@ -1193,12 +1201,12 @@ } final Fields fields = reader.fields(); - status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true); + status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false); if (liveDocs != null) { if (infoStream != null) { infoStream.print(" test (ignoring deletes): terms, freq, prox..."); } - checkFields(fields, null, maxDoc, fieldInfos, true); + checkFields(fields, null, maxDoc, fieldInfos, true, false); } } catch (Throwable e) { msg("ERROR: " + e); @@ -1415,10 +1423,10 @@ if (tfv != null) { // First run with no deletions: - checkFields(tfv, null, 1, fieldInfos, false); + checkFields(tfv, null, 1, fieldInfos, false, true); // Again, with the one doc deleted: - checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false); + checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true); // Only agg stats if the doc is live: final boolean doStats = liveDocs == null || liveDocs.get(j);