Index: src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 892408) +++ src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -64,6 +64,8 @@ IndexReader r = dw.writer.getReader(); dw.writer.commit(); verifyEquals(r, dir, "id"); + FlexTestUtil.verifyFlexVsPreFlex(this.r, r); + FlexTestUtil.verifyFlexVsPreFlex(this.r, dir); r.close(); dw.writer.close(); dir.close(); @@ -83,11 +85,15 @@ // verifyEquals(dir2, dir2, "id"); verifyEquals(dir1, dir2, "id"); + FlexTestUtil.verifyFlexVsPreFlex(r, dir1); + FlexTestUtil.verifyFlexVsPreFlex(r, dir2); } public void testMultiConfig() throws Throwable { // test lots of smaller different params together + r = newRandom(); + for (int i=0; i<20; i++) { // increase iterations for better testing sameFieldOrder=r.nextBoolean(); mergeFactor=r.nextInt(3)+2; @@ -106,6 +112,9 @@ indexSerial(docs, dir2); //System.out.println("TEST: verify"); verifyEquals(dir1, dir2, "id"); + + FlexTestUtil.verifyFlexVsPreFlex(r, dir1); + FlexTestUtil.verifyFlexVsPreFlex(r, dir2); } } @@ -206,7 +215,6 @@ threads[i].join(); } - // nocommit -- comment out again //w.optimize(); w.close(); @@ -666,5 +674,4 @@ } } } - } Index: src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- src/test/org/apache/lucene/index/TestCodecs.java (revision 892412) +++ src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -26,6 +26,7 @@ // nocommit -- test multiple codecs here? // TODO +// - test across fields // - fix this test to run once for all codecs // - make more docs per term, to test > 1 level skipping // - test all combinations of payloads/not and omitTF/not Index: src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- src/test/org/apache/lucene/index/TestOmitTf.java (revision 892408) +++ src/test/org/apache/lucene/index/TestOmitTf.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.Random; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -92,13 +93,18 @@ f2.setOmitTermFreqAndPositions(false); d.add(f2); + Random rnd = newRandom(); + writer.addDocument(d); // force merge + FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); writer.optimize(); // flush writer.close(); _TestUtil.checkIndex(ram); + FlexTestUtil.verifyFlexVsPreFlex(rnd, ram); + SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); FieldInfos fi = reader.fieldInfos(); assertTrue("OmitTermFreqAndPositions field bit should be set.", fi.fieldInfo("f1").omitTermFreqAndPositions); @@ -144,8 +150,12 @@ for(int i=0;i<30;i++) writer.addDocument(d); + Random rnd = newRandom(); + FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); + // force merge writer.optimize(); + FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); // flush writer.close(); Index: src/test/org/apache/lucene/index/TestIndexReaderReopen.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReaderReopen.java (revision 892408) +++ src/test/org/apache/lucene/index/TestIndexReaderReopen.java (working copy) @@ -860,6 +860,8 @@ assertReaderClosed(reader, true, true); assertReaderClosed(firstReader, true, true); + FlexTestUtil.verifyFlexVsPreFlex(rnd, dir); + dir.close(); } Index: src/test/org/apache/lucene/index/TestStressIndexing.java =================================================================== --- src/test/org/apache/lucene/index/TestStressIndexing.java (revision 892408) +++ src/test/org/apache/lucene/index/TestStressIndexing.java (working copy) @@ -153,6 +153,8 @@ modifier.close(); + FlexTestUtil.verifyFlexVsPreFlex(RANDOM, directory); + for(int i=0;i commitUserData) throws IOException { - r.doCommit(commitUserData); - } - - protected void doClose() throws IOException { - r.doClose(); - } - - public Collection getFieldNames(FieldOption fldOption) { - return r.getFieldNames(fldOption); - } - } - public void testExternalReader() throws Exception { Directory d = new MockRAMDirectory(); @@ -144,7 +44,7 @@ w.addDocument(doc); } - IndexReader r = new ExternalReader(w.getReader()); + IndexReader r = new FlexTestUtil.ForcedExternalReader(w.getReader()); TermRef field1Term = new TermRef("field1"); TermRef field2Term = new TermRef("field2"); Index: src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 892408) +++ src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -26,6 +26,7 @@ import java.io.DataInputStream; import java.io.OutputStream; import java.util.Arrays; +import java.util.Random; import java.util.Enumeration; import java.util.List; import java.util.ArrayList; @@ -65,11 +66,11 @@ // oldNames array. /* - public void testCreatePreLocklessCFS() throws IOException { + public void xxxtestCreatePreLocklessCFS() throws IOException { createIndex("index.cfs", true); } - public void testCreatePreLocklessNoCFS() throws IOException { + public void xxxtestCreatePreLocklessNoCFS() throws IOException { createIndex("index.nocfs", false); } */ @@ -110,13 +111,13 @@ zipFile.close(); } - public void testCreateCFS() throws IOException { + public void xxxtestCreateCFS() throws IOException { String dirName = "testindex.cfs"; createIndex(dirName, true); rmDir(dirName); } - public void testCreateNoCFS() throws IOException { + public void xxxtestCreateNoCFS() throws IOException { String dirName = "testindex.nocfs"; createIndex(dirName, true); rmDir(dirName); @@ -203,15 +204,20 @@ } } - public void testOptimizeOldIndex() throws IOException { + public void testOptimizeOldIndex() throws Exception { int hasTested29 = 0; + + Random rand = newRandom(); for(int i=0;i allTerms = new ArrayList(); + //System.out.println("TEST: now verify!!"); + testStraightEnum(r); + testRandomSkips(rand, r); + testRandomSeeks(rand, r); + } + + private static void testStraightEnum(IndexReader r) throws Exception { + + // straight enum of fields/terms/docs/positions + TermEnum termEnum = r.terms(); + FieldsEnum fields = r.fields().iterator(); + while(true) { + final String field = fields.next(); + if (field == null) { + boolean result = termEnum.next(); + if (result) { + System.out.println("got unexpected term=" + termEnum.term() + " termEnum=" + termEnum); + } + assertFalse(result); + break; + } + TermsEnum terms = fields.terms(); + final TermPositions termPos = r.termPositions(); + while(true) { + final TermRef termRef = terms.next(); + if (termRef == null) { + break; + } else { + assertTrue(termEnum.next()); + Term t = termEnum.term(); + assertEquals(t.field(), field); + assertEquals(t.text(), termRef.toString()); + assertEquals(termEnum.docFreq(), terms.docFreq()); + //allTerms.add(t); + + DocsEnum docs = terms.docs(r.getDeletedDocs()); + termPos.seek(t); + while(true) { + final int doc = docs.nextDoc(); + if (doc == DocsEnum.NO_MORE_DOCS) { + assertFalse(termPos.next()); + break; + } else { + assertTrue(termPos.next()); + assertEquals(termPos.doc(), doc); + assertEquals(termPos.freq(), docs.freq()); + //System.out.println("TEST: doc=" + doc + " freq=" + docs.freq()); + final int freq = docs.freq(); + PositionsEnum pos = docs.positions(); + for(int i=0;i commitUserData) throws IOException { + r.doCommit(commitUserData); + } + + protected void doClose() throws IOException { + r.doClose(); + } + + public Collection getFieldNames(FieldOption fldOption) { + return r.getFieldNames(fldOption); + } + } + + public static void main(String[] args) throws Exception { + Directory dir = FSDirectory.open(new File("/x/lucene/wiki.5M/index")); + verifyFlexVsPreFlex(new Random(), dir); + dir.close(); + } +} \ No newline at end of file Property changes on: src/test/org/apache/lucene/index/FlexTestUtil.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 892408) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -92,6 +92,7 @@ } subs[i] = subReaders[i].getDeletedDocs(); } + starts[subReaders.length] = maxDoc; if (hasDeletions) { deletedDocs = new MultiBits(subs, starts); @@ -154,8 +155,12 @@ } @Override - public Bits getDeletedDocs() { - return deletedDocs; + public Bits getDeletedDocs() throws IOException { + if (subReaders.length == 1) { + return subReaders[0].getDeletedDocs(); + } else { + return deletedDocs; + } } /** Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 892408) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -41,6 +41,8 @@ return atts; } + // nocommit -- state in API that doc/freq are undefined + // (defined?) after this? // nocommit -- fix this API so that intblock codecs are // able to return their own int arrays, to save a copy /** Bulk read: returns number of docs read. Subclass may Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 892408) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -22,32 +22,40 @@ /** Implements flex API (FieldsEnum/TermsEnum) on top of * pre-flex API. Used only for IndexReader impls outside - * Lucene's core. */ + * Lucene's core. + * + * @deprecated Migrate the external reader to the flex API */ +@Deprecated class LegacyFieldsEnum extends FieldsEnum { private final IndexReader r; private TermEnum terms; private String field; + private boolean init; public LegacyFieldsEnum(IndexReader r) throws IOException { this.r = r; terms = r.terms(); + init = true; } - private void doSeek(Term t) throws IOException { - terms.close(); - terms = r.terms(t); - } - @Override public String next() throws IOException { if (field != null) { - final Term seekTo = new Term(field, "\uFFFF"); - doSeek(seekTo); + terms.close(); + // jump to end of the current field: + terms = r.terms(new Term(field, "\uFFFF")); + assert terms.term() == null || !terms.term().field.equals(field); } + if (init) { + init = false; + if (!terms.next()) { + return null; + } + } if (terms.term() != null) { String newField = terms.term().field; - assert !newField.equals(field); + assert field == null || !newField.equals(field); field = newField; return field; } else { @@ -66,11 +74,12 @@ private TermEnum terms; private TermRef current; private final TermRef tr = new TermRef(); + private final LegacyDocsEnum docsEnum; LegacyTermsEnum(IndexReader r, String field) throws IOException { this.r = r; this.field = field; - this.terms = r.terms(new Term(field, "")); + docsEnum = new LegacyDocsEnum(r, field); } @Override @@ -81,11 +90,9 @@ @Override public SeekStatus seek(TermRef text) throws IOException { - - // nocommit -- should we optimize for "silly seek" - // cases, here? ie seek to term you're already on, to - // very next term , etc. - terms.close(); + if (terms != null) { + terms.close(); + } terms = r.terms(new Term(field, text.toString())); final Term t = terms.term(); @@ -117,16 +124,23 @@ @Override public TermRef next() throws IOException { - if (terms.next()) { + if (terms == null) { + // first next -- seek to start of field + terms = r.terms(new Term(field, "")); + if (terms.term() == null) { + return null; + } else { + tr.copy(terms.term().text()); + return current = tr; + } + } else if (terms.next()) { if (terms.term().field == field) { tr.copy(terms.term().text()); - current = tr; + return current = tr; } else { - current = null; + return null; } - return current; } else { - current = null; return null; } } @@ -136,12 +150,6 @@ return current; } - /* - public String text() { - return terms.term().text; - } - */ - @Override public int docFreq() { return terms.docFreq(); @@ -149,7 +157,8 @@ @Override public DocsEnum docs(Bits skipDocs) throws IOException { - return new LegacyDocsEnum(r, field, terms.term(), skipDocs); + docsEnum.reset(terms.term(), skipDocs); + return docsEnum; } public void close() throws IOException { @@ -159,30 +168,39 @@ // Emulates flex on top of legacy API private static class LegacyDocsEnum extends DocsEnum { - final TermDocs td; - final Term term; - final IndexReader r; - final String field; - final Bits skipDocs; + private final IndexReader r; + private final String field; + private final TermPositions tp; + private final LegacyPositionsEnum posEnum; - TermPositions tp; - int doc = -1; + private Term term; - LegacyDocsEnum(IndexReader r, String field, Term term, Bits skipDocs) throws IOException { + private int doc = -1; + + LegacyDocsEnum(IndexReader r, String field) throws IOException { this.r = r; this.field = field; + tp = r.termPositions(); + posEnum = new LegacyPositionsEnum(tp); + } + + public void reset(Term term, Bits skipDocs) throws IOException { this.term = term; - td = r.termDocs(term); - this.skipDocs = skipDocs; + tp.seek(term); + + if (skipDocs != r.getDeletedDocs()) { + // An external reader's TermDocs/Positions will + // silently skip deleted docs, so, we can't allow + // arbitrary skipDocs here: + //System.out.println("skipDocs=" + skipDocs + " vs " + r.getDeletedDocs()); + throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + } } - // nocommit -- must enforce skipDocs... but old API will - // always secretly skip deleted docs, and we can't work - // around that for external readers? @Override public int nextDoc() throws IOException { - if (td.next()) { - return doc = td.doc(); + if (tp.next()) { + return doc = tp.doc(); } else { return doc = NO_MORE_DOCS; } @@ -190,8 +208,8 @@ @Override public int advance(int target) throws IOException { - if (td.skipTo(target)) { - return doc = td.doc(); + if (tp.skipTo(target)) { + return doc = tp.doc(); } else { return doc = NO_MORE_DOCS; } @@ -199,7 +217,7 @@ @Override public int freq() { - return td.freq(); + return tp.freq(); } @Override @@ -207,27 +225,18 @@ return doc; } - @Override - public int read(int[] docs, int[] freqs) throws IOException { - return td.read(docs, freqs); - } - public void close() throws IOException { - td.close(); + tp.close(); } - LegacyPositionsEnum lpe; - @Override public PositionsEnum positions() throws IOException { - if (tp == null) { - tp = r.termPositions(term); - lpe = new LegacyPositionsEnum(tp); - } else { - tp.seek(term); - } - return lpe; + return posEnum; } + + // NOTE: we don't override bulk-read (docs & freqs) API + // -- leave it to base class, because TermPositions + // can't do bulk read } // Emulates flex on top of legacy API Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 892408) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -366,24 +366,30 @@ private MultiBits deletedDocs; - // Exposes a slice of an existing Bits as a new Bits - final static class SubBits implements Bits { + // Exposes a slice of an existing Bits as a new Bits. + // Only used when one provides an external skipDocs (ie, + // not the del docs from this DirectoryReader), to pull + // the DocsEnum of the sub readers + private final static class SubBits implements Bits { private final Bits parent; private final int start; private final int length; // start is inclusive; end is exclusive (length = end-start) - public SubBits(Bits parent, int start, int end) { + public SubBits(Bits parent, int start, int length) { this.parent = parent; this.start = start; - this.length = end - start; + this.length = length; + System.out.println("new sub-bits " + start + " " + length); + assert length >= 0: "length=" + length; } public boolean get(int doc) { if (doc >= length) { throw new RuntimeException("doc " + doc + " is out of bounds 0 .. " + (length-1)); } - return parent.get(doc-start); + assert doc < length: "doc=" + doc + " length=" + length; + return parent.get(doc+start); } } @@ -392,6 +398,7 @@ // should return null from getDeletedDocs: static final class MultiBits implements Bits { private final Bits[] subs; + // this is 1+subs.length, ie the last entry has the maxDoc final int[] starts; public MultiBits(Bits[] subs, int[] starts) { @@ -405,6 +412,8 @@ if (bits == null) { return false; } else { + final int length = starts[1+reader]-starts[reader]; + assert doc - starts[reader] < length: "doc=" + doc + " reader=" + reader + " starts[reader]=" + starts[reader] + " length=" + length; return bits.get(doc-starts[reader]); } } @@ -1144,12 +1153,13 @@ Terms terms; int base; int length; - Bits deletedDocs; + Bits skipDocs; public TermsWithBase(IndexReader reader, int base, String field) throws IOException { this.base = base; length = reader.maxDoc(); - deletedDocs = reader.getDeletedDocs(); + assert length >= 0: "length=" + length; + skipDocs = reader.getDeletedDocs(); terms = reader.fields().terms(field); } } @@ -1159,37 +1169,40 @@ String current; int base; int length; - Bits deletedDocs; + Bits skipDocs; public FieldsEnumWithBase(IndexReader reader, int base) throws IOException { this.base = base; length = reader.maxDoc(); - deletedDocs = reader.getDeletedDocs(); - fields = reader.fields().iterator(); + assert length >= 0: "length=" + length; + skipDocs = reader.getDeletedDocs(); + fields = reader.fields().iterator(); } } private final static class TermsEnumWithBase { - TermsEnum terms; - int base; - int length; + final TermsEnum terms; + final int base; + final int length; TermRef current; - Bits deletedDocs; + final Bits skipDocs; public TermsEnumWithBase(FieldsEnumWithBase start, TermsEnum terms, TermRef term) { this.terms = terms; current = term; - deletedDocs = start.deletedDocs; + skipDocs = start.skipDocs; base = start.base; length = start.length; + assert length >= 0: "length=" + length; } public TermsEnumWithBase(TermsWithBase start, TermsEnum terms, TermRef term) { this.terms = terms; current = term; - deletedDocs = start.deletedDocs; + skipDocs = start.skipDocs; base = start.base; length = start.length; + assert length >= 0: "length=" + length; } } @@ -1226,6 +1239,8 @@ } } + // Exposes flex API, merged from flex API of + // sub-segments. final static class MultiFields extends Fields { private final IndexReader[] readers; private final int[] starts; @@ -1250,9 +1265,10 @@ MultiTerms result = terms.get(field); if (result == null) { + // First time this field is requested, we create & add to terms: List subs = new ArrayList(); - // Gather all sub-readers that have this field + // Gather all sub-readers that share this field for(int i=0;i 0; return currentField; } @@ -1335,7 +1363,7 @@ if (top[i].current != null) { queue.add(top[i]); } else { - // no more fields in this reader + // no more fields in this sub-reader } } @@ -1363,7 +1391,9 @@ } } - // Exposes flex API, merged from flex API of sub-segments + // Exposes flex API, merged from flex API of + // sub-segments. This does a merge sort, by term text, of + // the sub-readers. private static final class MultiTermsEnum extends TermsEnum { private final TermMergeQueue queue; @@ -1404,7 +1434,12 @@ if (termComp == null) { queue.termComp = termComp = termsEnum.getTermComparator(); } else { - assert termsEnum.getTermComparator() == null || termComp.equals(termsEnum.getTermComparator()); + // We cannot merge sub-readers that have + // different TermComps + final TermRef.Comparator subTermComp = termsEnum.getTermComparator(); + if (subTermComp != null && !subTermComp.equals(termComp)) { + throw new IllegalStateException("sub-readers have different TermRef.Comparators; cannot merge"); + } } final TermRef term = termsEnum.next(); if (term != null) { @@ -1488,6 +1523,8 @@ } private final void pullTop() { + // extract all subs from the queue that have the same + // top term assert numTop == 0; while(true) { top[numTop++] = queue.pop(); @@ -1499,6 +1536,7 @@ } private final void pushTop() throws IOException { + // call next() on each top, and put back into queue for(int i=0;i= 0: "subs[" + i + " of " + numSubs + "].length=" + subs[i].length; + + // Optimize for common case: requested skip docs is + // simply our (DiretoryReader's) deleted docs. In + // this case, we just pull the skipDocs from the sub + // reader, rather than making the inefficient + // Sub(Multi(sub-readers)): if (skipDocs instanceof MultiBits) { MultiBits multiBits = (MultiBits) skipDocs; int reader = ReaderUtil.subIndex(subs[i].base, multiBits.starts); - // System.out.println("bits=" + multiBits + " starts=" + multiBits.starts + " subs=" + subs + " subs[i]=" + subs[i] + " subs[1+i]=" + subs[1+i] + " i=" + i + " numSubs=" + numSubs); + assert reader < multiBits.starts.length-1: " reader=" + reader + " multiBits.starts.length=" + multiBits.starts.length; + final int length = multiBits.starts[reader+1] - multiBits.starts[reader]; if (multiBits.starts[reader] == subs[i].base && - (i == numSubs-1 || - reader == multiBits.starts.length-1 || - multiBits.starts[1+reader] == subs[1+i].base)) { + length == subs[i].length) { bits = multiBits.subs[reader]; handled = true; } @@ -1698,8 +1739,9 @@ if (t != null) { termEnum = reader.terms(t); - } else + } else { termEnum = reader.terms(); + } LegacySegmentMergeInfo smi = new LegacySegmentMergeInfo(starts[i], termEnum, reader); smi.ord = i; Index: src/java/org/apache/lucene/index/FieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/FieldsEnum.java (revision 892408) +++ src/java/org/apache/lucene/index/FieldsEnum.java (working copy) @@ -21,7 +21,8 @@ import org.apache.lucene.util.AttributeSource; -/** Enumerates indexed fields. +/** Enumerates indexed fields. You must first call {@link + * #next} before calling {@link #terms}. * * NOTE: this API is experimental and will likely change */ @@ -38,14 +39,20 @@ } // nocommit -- do we need seek? - - /** Increments the enumeration to the next field. - * Returns null when there are no more fields.*/ + // nocommit -- should this return FieldInfo? + /** Increments the enumeration to the next field. The + * returned field is always interned, so simple == + * comparison is allowed. Returns null when there are no + * more fields.*/ public abstract String next() throws IOException; - /** Get TermsEnum for the current field. You should not - * call {@link #next()} until you're done using this - * TermsEnum. */ + // nocommit should we add a field()? fieldInfo()? + // mirrors TermsEnum + + /** Get {@link TermsEnum} for the current field. You + * should not call {@link #next()} until you're done + * using this {@link TermsEnum}. After {@link #next} + * returns null, this method should not be called. */ public abstract TermsEnum terms() throws IOException; } Index: src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermsEnum.java (revision 892408) +++ src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -98,10 +98,10 @@ * are done using the DocsEnum. */ public abstract DocsEnum docs(Bits skipDocs) throws IOException; - /** Return the TermRef Comparator used to sort terms - * provided by the iterator. NOTE: this may return null - * if there are no terms. This method may be invoked - * many times; it's best to cache a single instance & - * reuse it. */ + /** Return the {@link TermRef} Comparator used to sort + * terms provided by the iterator. NOTE: this may return + * null if there are no terms. Callers may invoke this + * method many times, so it's best to cache a single + * instance & reuse it. */ public abstract TermRef.Comparator getTermComparator() throws IOException; } Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 892408) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -813,7 +813,7 @@ // direct access to old: return ((PreFlexFields) core.fields).tis.terms(); } else { - // Emulate old API on top of new index + // Emulate pre-flex API on top of flex index return new LegacyTermEnum(null); } } @@ -829,7 +829,7 @@ // direct access to old: return ((PreFlexFields) core.fields).tis.terms(t); } else { - // Emulate old API on top of new index + // Emulate pre-flex API on top of flex index return new LegacyTermEnum(t); } } @@ -875,7 +875,9 @@ // converting old API -> new API -> old API, just give // direct access to old: final PreFlexFields pre = (PreFlexFields) core.fields; - return new SegmentTermDocs(pre.freqStream, deletedDocs, pre.tis, core.fieldInfos); + SegmentTermDocs std = new SegmentTermDocs(pre.freqStream, pre.tis, core.fieldInfos); + std.setSkipDocs(deletedDocs); + return std; } else { // Emulate old API return new LegacyTermDocs(); @@ -892,7 +894,9 @@ // converting old API -> new API -> old API, just give // direct access to old: final PreFlexFields pre = (PreFlexFields) core.fields; - return new SegmentTermPositions(pre.freqStream, pre.proxStream, deletedDocs, pre.tis, core.fieldInfos); + SegmentTermPositions stp = new SegmentTermPositions(pre.freqStream, pre.proxStream, pre.tis, core.fieldInfos); + stp.setSkipDocs(deletedDocs); + return stp; } else // Emulate old API return new LegacyTermPositions(); @@ -1295,7 +1299,7 @@ return core.termsIndexDivisor; } - // Back compat: legacy TermEnum API over flex API + // Back compat: pre-flex TermEnum API over flex API final private class LegacyTermEnum extends TermEnum { FieldsEnum fields; TermsEnum terms; @@ -1304,7 +1308,6 @@ TermRef currentTerm; public LegacyTermEnum(Term t) throws IOException { - // System.out.println("sr.lte.init: term=" + t); fields = core.fields.iterator(); currentField = fields.next(); if (currentField == null) { @@ -1319,7 +1322,7 @@ while(currentField.compareTo(t.field) < 0) { currentField = fields.next(); if (currentField == null) { - // Didn't find the field + // Hit end of fields done = true; break; } @@ -1329,10 +1332,12 @@ // We found some field -- get its terms: terms = fields.terms(); - if (currentField.equals(t.field)) { + // nocommit: confirm inlining is working! + if (currentField == t.field) { // We found exactly the requested field; now // seek the term text: String text = t.text(); + // this is only for backwards compatibility. // previously you could supply a term with unpaired surrogates, // and it would return the next Term. @@ -1340,6 +1345,7 @@ // this emulates the old behavior, and forms "valid UTF-8" unicode. TermRef tr = new TermRef(UnicodeUtil.nextValidUTF16String(text)); TermsEnum.SeekStatus status = terms.seek(tr); + if (status == TermsEnum.SeekStatus.END) { // Rollover to the next field terms = null; @@ -1379,8 +1385,9 @@ // Advance to the next field currentField = fields.next(); if (currentField == null) { - if (Codec.DEBUG) + if (Codec.DEBUG) { System.out.println(" fields.next returned false"); + } done = true; return false; } @@ -1569,6 +1576,4 @@ return positions.hasPayload(); } } - - } Index: src/java/org/apache/lucene/index/TermPositions.java =================================================================== --- src/java/org/apache/lucene/index/TermPositions.java (revision 892408) +++ src/java/org/apache/lucene/index/TermPositions.java (working copy) @@ -28,7 +28,7 @@ * @see IndexReader#termPositions() * @deprecated Use {@link PositionsEnum} instead */ - +@Deprecated public interface TermPositions extends TermDocs { Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 892408) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -1171,17 +1171,28 @@ */ public abstract Collection getFieldNames(FieldOption fldOption); + // Only used by external subclasses of IndexReader; all + // internal classes should implement Bits more + // efficiently: private final class DeletedDocsBits implements Bits { public boolean get(int docID) { return isDeleted(docID); } } + /** + * Returns the {@link Bits} representing deleted docs. A + * set bit indicates the doc ID has been deleted. This + * method should return null when there are no deleted + * docs. */ + private Bits deletedDocsBits; public Bits getDeletedDocs() throws IOException { - return new DeletedDocsBits(); + if (deletedDocsBits == null) { + deletedDocsBits = new DeletedDocsBits(); + } + return deletedDocsBits; } - /** * Forcibly unlocks the index in the named directory. *

Index: src/java/org/apache/lucene/index/LegacyFields.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFields.java (revision 892408) +++ src/java/org/apache/lucene/index/LegacyFields.java (working copy) @@ -19,9 +19,9 @@ import java.io.IOException; -/** Implements new API (FieldsEnum/TermsEnum) on top of old - * API. Used only for IndexReader impls outside Lucene's - * core. */ +/** Implements flex API (FieldsEnum/TermsEnum) on top of + * non-flex API. Used only for IndexReader impls outside + * Lucene's core. */ class LegacyFields extends Fields { private final IndexReader r; Index: src/java/org/apache/lucene/index/Fields.java =================================================================== --- src/java/org/apache/lucene/index/Fields.java (revision 892408) +++ src/java/org/apache/lucene/index/Fields.java (working copy) @@ -19,23 +19,12 @@ import java.io.IOException; -// TODO: split out an "iterator" api from the terms(String -// field) API? - -// nocommit -- intended to be forward only? eg no "reset"? - /** Access to fields and terms * * NOTE: this API is experimental and will likely change */ -// TODO: someday expose public version of FieldInfos here public abstract class Fields { - // nocommit -- clarify if this is forwards only. should - // this be "skipTo"? - // nocommit -- clarify: when this returns false, what is - // its internal state? eg if i call field() after getting - // false back? /** Returns an iterator that will step through all fields * names */ public abstract FieldsEnum iterator() throws IOException; Index: src/java/org/apache/lucene/index/codecs/DocsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/DocsConsumer.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/DocsConsumer.java (working copy) @@ -69,7 +69,7 @@ if (docMap != null) { // map around deletions doc = docMap[startDoc]; - assert doc != -1: "postings enum returned deleted docID " + startDoc + " freq=" + docs.freq() + " df=" + df; + assert doc != -1: "docs enum returned deleted docID " + startDoc + " freq=" + docs.freq() + " df=" + df + " de=" + docs; } else { doc = startDoc; } Index: src/java/org/apache/lucene/index/codecs/TermsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/TermsConsumer.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/TermsConsumer.java (working copy) @@ -110,6 +110,7 @@ while(true) { TermMergeState state = pending[pendingCount++] = queue.pop(); + DocsEnum docsEnum = state.termsEnum.docs(mergeState.readers.get(state.readerIndex).getDeletedDocs()); if (docsEnum != null) { match[matchCount].docsEnum = docsEnum; Index: src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPositionsReader.java (working copy) @@ -112,9 +112,10 @@ @Override public PositionsEnum positions() throws IOException { - if (positions == null) + if (positions == null) { // Lazy init positions = new SegmentPositionsEnum(); + } return positions; } @@ -223,7 +224,7 @@ skipPosCount--; // NOTE: the old API actually allowed this... - assert skipPosCount >= 0: "next() was called too many times (more than FormatPostingsDocsEnum.freq() times)"; + assert skipPosCount >= 0: "next() was called too many times (more than FormatPostingsDocsEnum.freq() times) skipPosCount=" + skipPosCount; if (Codec.DEBUG) System.out.println(" proxFP=" + proxIn.getFilePointer() + " return pos=" + position); Index: src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/standard/StandardDocsReader.java (working copy) @@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.standard.StandardTermsDictReader.CacheEntry; /** Concrete class that reads the current doc/freq/skip - * postings format */ + * postings format. */ // nocommit -- should we switch "hasProx" higher up? and // create two separate docs readers, one that also reads @@ -336,6 +336,7 @@ if (Codec.DEBUG) { System.out.println(" result doc=" + doc); } + return doc; } @@ -406,8 +407,9 @@ // indexing, which means we pretend termFreq is // always 1 with that 1 occurrence having // position 0 - if (fakePositions == null) + if (fakePositions == null) { fakePositions = new FormatPostingsFakePositionsEnum(); + } return fakePositions; } else { // TODO: abstraction violation Index: src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/preflex/SegmentTermPositions.java (working copy) @@ -51,8 +51,8 @@ */ // nocommit -- public - public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, Bits skipDocs, TermInfosReader tis, FieldInfos fieldInfos) { - super(freqStream, skipDocs, tis, fieldInfos); + public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, TermInfosReader tis, FieldInfos fieldInfos) { + super(freqStream, tis, fieldInfos); this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time } Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (working copy) @@ -30,7 +30,12 @@ /** Codec that reads the pre-flex-indexing postings * format. It does not provide a writer because newly - * written segments should use StandardCodec. */ + * written segments should use StandardCodec. + * + * @deprecated This is only used to read indexes created + * before 3.1. + */ +@Deprecated public class PreFlexCodec extends Codec { /** Extension of terms file */ Index: src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy) @@ -31,9 +31,9 @@ /** @deprecated */ public class SegmentTermDocs implements TermDocs { //protected SegmentReader parent; - protected final Bits skipDocs; private final FieldInfos fieldInfos; private final TermInfosReader tis; + protected Bits skipDocs; protected IndexInput freqStream; protected int count; protected int df; @@ -66,9 +66,8 @@ */ // nocommit -- SR needs public - public SegmentTermDocs(IndexInput freqStream, Bits skipDocs, TermInfosReader tis, FieldInfos fieldInfos) { + public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { this.freqStream = (IndexInput) freqStream.clone(); - this.skipDocs = skipDocs; this.tis = tis; this.fieldInfos = fieldInfos; skipInterval = tis.getSkipInterval(); @@ -80,6 +79,10 @@ seek(ti, term); } + public void setSkipDocs(Bits skipDocs) { + this.skipDocs = skipDocs; + } + public void seek(TermEnum termEnum) throws IOException { TermInfo ti; Term term; Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 892408) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -116,8 +116,8 @@ } @Override - public FieldsEnum iterator() { - return new Fields(); + public FieldsEnum iterator() throws IOException { + return new PreFlexFieldsEnum(); } @Override @@ -177,42 +177,32 @@ } } - private class Fields extends FieldsEnum { - Iterator it; + private class PreFlexFieldsEnum extends FieldsEnum { + final Iterator it; + private final PreTermsEnum termsEnum; + private int count; FieldInfo current; - private SegmentTermEnum lastTermEnum; - private int fieldCount; - public Fields() { + public PreFlexFieldsEnum() throws IOException { it = fields.values().iterator(); + termsEnum = new PreTermsEnum(); } @Override public String next() { if (it.hasNext()) { - fieldCount++; + count++; current = it.next(); return current.name; } else { return null; } } - + @Override public TermsEnum terms() throws IOException { - final PreTermsEnum terms; - if (lastTermEnum != null) { - // Re-use SegmentTermEnum to avoid seeking for - // linear scan (done by merging) - terms = new PreTermsEnum(current, lastTermEnum); - } else { - // If fieldCount is 1 then the terms enum can simply - // start at the start of the index (need not seek to - // the current field): - terms = new PreTermsEnum(current, fieldCount != 1); - lastTermEnum = terms.terms; - } - return terms; + termsEnum.reset(current, count == 1); + return termsEnum; } } @@ -223,9 +213,10 @@ } @Override - public TermsEnum iterator() throws IOException { - //System.out.println("pff.init create no context"); - return new PreTermsEnum(fieldInfo, true); + public TermsEnum iterator() throws IOException { + PreTermsEnum termsEnum = new PreTermsEnum(); + termsEnum.reset(fieldInfo, false); + return termsEnum; } @Override @@ -236,42 +227,39 @@ } private class PreTermsEnum extends TermsEnum { - private SegmentTermEnum terms; - private final FieldInfo fieldInfo; - private PreDocsEnum docsEnum; // nocommit -- unused + private SegmentTermEnum termEnum; + private FieldInfo fieldInfo; + private final PreDocsEnum docsEnum; private boolean skipNext; private TermRef current; private final TermRef scratchTermRef = new TermRef(); - // Pass needsSeek=false if the field is the very first - // field in the index -- this is used for linear scan of - // the index, eg when merging segments: - PreTermsEnum(FieldInfo fieldInfo, boolean needsSeek) throws IOException { - this.fieldInfo = fieldInfo; - if (!needsSeek) { - terms = getTermsDict().terms(); - } else { - terms = getTermsDict().terms(new Term(fieldInfo.name, "")); - skipNext = true; - } + public PreTermsEnum() throws IOException { + docsEnum = new PreDocsEnum(); } - PreTermsEnum(FieldInfo fieldInfo, SegmentTermEnum terms) throws IOException { + void reset(FieldInfo fieldInfo, boolean isFirstField) throws IOException { this.fieldInfo = fieldInfo; - if (terms.term() == null || terms.term().field() != fieldInfo.name) { - terms = getTermsDict().terms(new Term(fieldInfo.name, "")); + if (termEnum == null) { + // First time reset is called + if (isFirstField) { + termEnum = getTermsDict().terms(); + skipNext = false; + } else { + termEnum = getTermsDict().terms(new Term(fieldInfo.name, "")); + skipNext = true; + } } else { - // Carefully avoid seeking in the linear-scan case, - // because segment doesn't load/need the terms dict - // index during merging. If the terms is already on - // our field, it must be because it had seeked to - // exhaustion on the last field - this.terms = terms; + final Term t = termEnum.term(); + if (t != null && t.field() == fieldInfo.name) { + // No need to seek -- we have already advanced onto + // this field + } else { + assert t == null || !t.field().equals(fieldInfo.name); // make sure field name is interned + termEnum = getTermsDict().terms(new Term(fieldInfo.name, "")); + } + skipNext = true; } - skipNext = true; - if (Codec.DEBUG) { - System.out.println("pff.terms.init field=" + fieldInfo.name); - } } @Override @@ -295,8 +283,9 @@ if (Codec.DEBUG) { System.out.println("pff.seek term=" + term); } - terms = getTermsDict().terms(new Term(fieldInfo.name, term.toString())); - final Term t = terms.term(); + skipNext = false; + termEnum = getTermsDict().terms(new Term(fieldInfo.name, term.toString())); + final Term t = termEnum.term(); final TermRef tr; if (t != null) { @@ -321,14 +310,16 @@ @Override public TermRef next() throws IOException { if (skipNext) { - // nocommit -- is there a cleaner way? skipNext = false; - scratchTermRef.copy(terms.term().text()); - current = scratchTermRef; - return current; + if (termEnum.term() == null) { + return null; + } else { + scratchTermRef.copy(termEnum.term().text()); + return current = scratchTermRef; + } } - if (terms.next()) { - final Term t = terms.term(); + if (termEnum.next()) { + final Term t = termEnum.term(); if (Codec.DEBUG) { System.out.println("pff.next term=" + t); } @@ -340,15 +331,14 @@ current = scratchTermRef; return current; } else { + assert !t.field().equals(fieldInfo.name); // make sure field name is interned // Crossed into new field if (Codec.DEBUG) { System.out.println(" stop (new field " + t.field()); } - current = null; return null; } } else { - current = null; return null; } } @@ -360,36 +350,29 @@ @Override public int docFreq() { - return terms.docFreq(); + return termEnum.docFreq(); } @Override public DocsEnum docs(Bits skipDocs) throws IOException { - // nocommit -- reuse? - return new PreDocsEnum(skipDocs, terms); + docsEnum.reset(termEnum, skipDocs); + return docsEnum; } } private final class PreDocsEnum extends DocsEnum { - final private SegmentTermDocs docs; final private SegmentTermPositions pos; - private SegmentTermDocs current; final private PrePositionsEnum prePos; + private Bits skipDocs; - PreDocsEnum(Bits skipDocs, Term t) throws IOException { - current = docs = new SegmentTermDocs(freqStream, skipDocs, getTermsDict(), fieldInfos); - pos = new SegmentTermPositions(freqStream, proxStream, skipDocs, getTermsDict(), fieldInfos); + PreDocsEnum() throws IOException { + pos = new SegmentTermPositions(freqStream, proxStream, getTermsDict(), fieldInfos); prePos = new PrePositionsEnum(pos); - docs.seek(t); - pos.seek(t); } - PreDocsEnum(Bits skipDocs, SegmentTermEnum te) throws IOException { - current = docs = new SegmentTermDocs(freqStream, skipDocs, getTermsDict(), fieldInfos); - pos = new SegmentTermPositions(freqStream, proxStream, skipDocs, getTermsDict(), fieldInfos); - prePos = new PrePositionsEnum(pos); - docs.seek(te); - pos.seek(te); + public void reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException { + pos.setSkipDocs(skipDocs); + pos.seek(termEnum); } @Override @@ -397,8 +380,8 @@ if (Codec.DEBUG) { System.out.println("pff.docs.next"); } - if (current.next()) { - return current.doc(); + if (pos.next()) { + return pos.doc(); } else { return NO_MORE_DOCS; } @@ -406,8 +389,8 @@ @Override public int advance(int target) throws IOException { - if (current.skipTo(target)) { - return current.doc(); + if (pos.skipTo(target)) { + return pos.doc(); } else { return NO_MORE_DOCS; } @@ -415,31 +398,22 @@ @Override public int freq() { - return current.freq(); + return pos.freq(); } @Override public int docID() { - return current.doc(); + return pos.doc(); } @Override - public int read(int[] docIDs, int[] freqs) throws IOException { - if (current != docs) { - docs.skipTo(current.doc()); - current = docs; - } - return current.read(docIDs, freqs); - } - - @Override public PositionsEnum positions() throws IOException { - if (current != pos) { - pos.skipTo(docs.doc()); - current = pos; - } return prePos; } + + // NOTE: we don't override bulk-read (docs & freqs) API + // -- leave it to base class, because TermPositions + // can't do bulk read } private final class PrePositionsEnum extends PositionsEnum { Index: src/java/org/apache/lucene/index/TermDocs.java =================================================================== --- src/java/org/apache/lucene/index/TermDocs.java (revision 892408) +++ src/java/org/apache/lucene/index/TermDocs.java (working copy) @@ -30,6 +30,7 @@ @deprecated Use {@link DocsEnum} instead */ +@Deprecated public interface TermDocs extends Closeable { /** Sets this to the data for a term. * The enumeration is reset to the start of the data for this term. Index: contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (revision 892408) +++ contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (working copy) @@ -238,5 +238,29 @@ } }; } + + @Override + public TermDocs termDocs() throws IOException { + return new FilterTermDocs(in.termDocs()) { + + @Override + public boolean next() throws IOException { + boolean res; + while ((res = super.next())) { + if (!dels.get(doc())) { + break; + } + } + return res; + } + }; + } + + @Override + public TermDocs termDocs(Term term) throws IOException { + TermDocs termDocs = termDocs(); + termDocs.seek(term); + return termDocs; + } } }