Index: src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- src/test/org/apache/lucene/index/TestCodecs.java (revision 919739) +++ src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -511,7 +511,9 @@ DocsAndPositionsEnum postings = termsEnum.docsAndPositions(null, null); final DocsEnum docsEnum; - if (postings != null) { + // nocommit -- need way to query codec if it + // "supports" positions + if (postings != DocsAndPositionsEnum.EMPTY) { docsEnum = postings; } else { docsEnum = docs; Index: src/test/org/apache/lucene/index/FlexTestUtil.java =================================================================== --- src/test/org/apache/lucene/index/FlexTestUtil.java (revision 919739) +++ src/test/org/apache/lucene/index/FlexTestUtil.java (working copy) @@ -73,8 +73,7 @@ public static void verifyFlexVsPreFlex(Random rand, IndexReader r) throws Exception { // First test on DirReader - // nocommit turn back on - // verifyFlexVsPreFlexSingle(rand, r); + verifyFlexVsPreFlexSingle(rand, r); // Then on each individual sub reader IndexReader[] subReaders = r.getSequentialSubReaders(); @@ -86,24 +85,18 @@ } // Then on a new MultiReader - // nocommit -- back on: - if (false) { - IndexReader m = new MultiReader(subReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + IndexReader m = new MultiReader(subReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); // Then on a forced-external reader (forced flex to // emulate API on pre-flex API, which in turn is // emulating pre-flex on flex -- twisted, but, better // work): - // nocommit back on - if (false) { - verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); - IndexReader m = new MultiReader(forcedSubReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); + m = new MultiReader(forcedSubReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); } private static void verifyFlexVsPreFlexSingle(Random rand, IndexReader r) throws Exception { @@ -119,7 +112,7 @@ // straight enum of fields/terms/docs/positions TermEnum termEnum = r.terms(); - FieldsEnum fields = r.fields().iterator(); + FieldsEnum fields = MultiFields.getFields(r).iterator(); while(true) { final String field = fields.next(); if (field == null) { @@ -146,8 +139,8 @@ assertEquals(termEnum.docFreq(), terms.docFreq()); //allTerms.add(t); - postings = terms.docsAndPositions(r.getDeletedDocs(), postings); - docsEnum = terms.docs(r.getDeletedDocs(), docsEnum); + postings = terms.docsAndPositions(MultiFields.getDeletedDocs(r), postings); + docsEnum = terms.docs(MultiFields.getDeletedDocs(r), docsEnum); final DocsEnum docs; if (postings != null) { @@ -166,7 +159,6 @@ assertTrue(termPos.next()); assertEquals(termPos.doc(), doc); assertEquals(termPos.freq(), docs.freq()); - //System.out.println("TEST: doc=" + doc + " freq=" + docs.freq()); final int freq = docs.freq(); if (postings == null) { assertEquals(1, freq); @@ -175,7 +167,6 @@ } else { for(int i=0;i 1) { postingsEnum = new UnionDocsAndPositionsEnum(reader, terms); } else { - postingsEnum = reader.termPositionsEnum(reader.getDeletedDocs(), + postingsEnum = reader.termPositionsEnum(MultiFields.getDeletedDocs(reader), terms[0].field(), new BytesRef(terms[0].text())); } @@ -454,7 +455,7 @@ public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { List docsEnums = new LinkedList(); - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); for (int i = 0; i < terms.length; i++) { DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs, terms[i].field(), Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.MultiFields; @@ -52,8 +51,7 @@ @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (prefix.text().length() == 0) { - final Terms terms = MultiFields.getTerms(reader, getField()); - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return MultiFields.getTerms(reader, getField()).iterator(); } return new PrefixTermsEnum(reader, prefix); } Index: src/java/org/apache/lucene/search/function/ValueSourceQuery.java =================================================================== --- src/java/org/apache/lucene/search/function/ValueSourceQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/function/ValueSourceQuery.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; import org.apache.lucene.index.Term; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -137,7 +138,7 @@ qWeight = w.getValue(); // this is when/where the values are first created. vals = valSrc.getValues(reader); - delDocs = reader.getDeletedDocs(); + delDocs = MultiFields.getDeletedDocs(reader); maxDoc = reader.maxDoc(); } Index: src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/PhraseQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Explanation.IDFExplanation; @@ -154,7 +155,7 @@ return null; DocsAndPositionsEnum[] postings = new DocsAndPositionsEnum[terms.size()]; - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); final BytesRef text = new BytesRef(t.text()); @@ -164,11 +165,18 @@ delDocs, t.field(), text); - if (postingsEnum == null) { + assert postingsEnum != null: "reader=" + reader; + + // nocommit -- need a way to directly ask if codec + // stores positions + // try to notify user when they are running + // PhraseQuery on a field that did not index + // positions. + if (postingsEnum == DocsAndPositionsEnum.EMPTY) { if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), - text) != null) { + text) != DocsEnum.EMPTY) { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")"); } else { Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -22,7 +22,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -143,8 +142,7 @@ if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) { // NOTE: debateably, the caller should never pass in a // multi reader... - final Terms terms = MultiFields.getTerms(reader, field); - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return MultiFields.getTerms(reader, field).iterator(); } return new TermRangeTermsEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); Index: src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -75,7 +75,7 @@ public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { // NOTE: debateably, the caller should never pass in a // multi reader... - DocsEnum docs = MultiFields.getTermDocsEnum(reader, reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs == null) { return null; } @@ -118,7 +118,7 @@ Explanation tfExplanation = new Explanation(); int tf = 0; - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs != null) { int newDoc = docs.advance(doc); if (newDoc == doc) { Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 919739) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -283,7 +283,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -337,7 +337,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -396,7 +396,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -468,7 +468,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -536,7 +536,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -606,7 +606,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -658,7 +658,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); @@ -711,7 +711,7 @@ if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); Index: src/java/org/apache/lucene/search/AutomatonQuery.java =================================================================== --- src/java/org/apache/lucene/search/AutomatonQuery.java (revision 919739) +++ src/java/org/apache/lucene/search/AutomatonQuery.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -79,9 +78,7 @@ // matches all possible strings if (BasicOperations.isTotal(automaton)) { - final Terms terms = MultiFields.getTerms(reader, getField()); - // nocommit -- should we just return null? singleton? - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return MultiFields.getTerms(reader, getField()).iterator(); } // matches a fixed string in singleton representation Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 919739) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -197,11 +197,11 @@ this.term = term; td.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs()"); } return this; @@ -256,11 +256,11 @@ this.term = term; tp.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs() skipDocs=" + skipDocs + " MultiFields.getDeletedDocs=" + MultiFields.getDeletedDocs(r) + " r=" + r); } return this; Index: src/java/org/apache/lucene/index/FieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/FieldsEnum.java (revision 919739) +++ src/java/org/apache/lucene/index/FieldsEnum.java (working copy) @@ -28,7 +28,8 @@ public abstract class FieldsEnum { - public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + // TODO: maybe allow retrieving FieldInfo for current + // field, as optional method? private AttributeSource atts = null; @@ -36,25 +37,38 @@ * Returns the related attributes. */ public AttributeSource attributes() { - if (atts == null) atts = new AttributeSource(); + if (atts == null) { + atts = new AttributeSource(); + } return atts; } - // nocommit -- do we need seek? - // nocommit -- should this return FieldInfo? /** Increments the enumeration to the next field. The * returned field is always interned, so simple == * comparison is allowed. Returns null when there are no * more fields.*/ public abstract String next() throws IOException; - // nocommit should we add a field()? fieldInfo()? - // mirrors TermsEnum - /** Get {@link TermsEnum} for the current field. You - * should not call {@link #next()} until you're done - * using this {@link TermsEnum}. After {@link #next} - * returns null, this method should not be called. */ + * should not call {@link #next} until you're done using + * this {@link TermsEnum}. After {@link #next} returns + * null, this method should not be called. This method + * will not return null. */ public abstract TermsEnum terms() throws IOException; + + public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + + /** Provides zero fields */ + public final static FieldsEnum EMPTY = new FieldsEnum() { + + @Override + public String next() { + return null; + } + + @Override + public TermsEnum terms() { + throw new IllegalStateException("this method should never be called"); + } + }; } - Index: src/java/org/apache/lucene/index/MultiTerms.java =================================================================== --- src/java/org/apache/lucene/index/MultiTerms.java (revision 919739) +++ src/java/org/apache/lucene/index/MultiTerms.java (working copy) @@ -1,6 +1,5 @@ package org.apache.lucene.index; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -72,7 +71,7 @@ if (termsEnums.size() > 0) { return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY)); } else { - return null; + return TermsEnum.EMPTY; } } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 919739) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -90,13 +90,12 @@ private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false - // nocommit: unread field - private boolean flexPostings; // True if postings were written with new flex format private Codec codec; private Map diagnostics; + // nocommit -- who uses this one? public SegmentInfo(String name, int docCount, Directory dir, Codec codec) { this.name = name; this.docCount = docCount; @@ -110,17 +109,9 @@ docStoreIsCompoundFile = false; delCount = 0; hasProx = true; - flexPostings = true; this.codec = codec; } - // nocommit -- this ctor is only used by back-compat tests - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { - this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true, null); - SegmentWriteState state = new SegmentWriteState(null, dir, name, null, null, docCount, docCount, -1, Codecs.getDefault()); - codec = state.codec = Codecs.getDefault().getWriter(state); - } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { @@ -627,11 +618,9 @@ if (useCompoundFile) { files.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { - final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { addIfExists(files, IndexFileNames.segmentFileName(name, ext)); } - // nocommit -- only does ifExists on prx for standard codec codec.files(dir, this, files); } Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 919739) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -171,7 +171,7 @@ if (terms != null) { return terms.iterator(); } else { - return null; + return TermsEnum.EMPTY; } } } @@ -196,7 +196,7 @@ @Override public Bits getDeletedDocs() throws IOException { - return ((IndexReader) readers.get(0)).getDeletedDocs(); + return MultiFields.getDeletedDocs(readers.get(0)); } @Override Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 919739) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -57,7 +57,6 @@ private SegmentInfo si; private int readBufferSize; - boolean isPreFlex; CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); @@ -835,7 +834,7 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -852,7 +851,7 @@ @Override public TermEnum terms(Term t) throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -901,7 +900,7 @@ @Override public TermDocs termDocs() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -921,7 +920,7 @@ @Override public TermPositions termPositions() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 919739) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -384,9 +384,9 @@ public Object clone() { SegmentInfos sis = (SegmentInfos) super.clone(); for(int i=0;i(userData); return sis; Index: src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndex.java (revision 919739) +++ src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -492,7 +492,7 @@ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index - segInfoStat.termIndexStatus = testTermIndex(info, reader); + segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); @@ -575,7 +575,7 @@ /** * Test the term index. */ - private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final int maxDoc = reader.maxDoc(); @@ -612,7 +612,9 @@ postings = terms.docsAndPositions(delDocs, postings); final DocsEnum docs2; - if (postings != null) { + // nocommit -- need a way to ask codec if it + // supports positions + if (postings != DocsAndPositionsEnum.EMPTY) { docs2 = postings; } else { docs2 = docs; @@ -642,7 +644,9 @@ } int lastPos = -1; - if (postings != null) { + // nocommit -- need a way to ask codec if it + // supports positions + if (postings != DocsAndPositionsEnum.EMPTY) { for(int j=0;j threadEnums = new CloseableThreadLocal(); - /** Returns an iterator that will step through all terms */ + /** Returns an iterator that will step through all + * terms. This method will not return null.*/ public abstract TermsEnum iterator() throws IOException; /** Return the BytesRef Comparator used to sort terms - * provided by the iterator. NOTE: this may return null - * if there are no terms. This method may be invoked - * many times; it's best to cache a single instance & - * reuse it. */ + * provided by the iterator. This method will not return + * null. This method may be invoked many times; it's + * best to cache a single instance & reuse it. */ public abstract BytesRef.Comparator getComparator() throws IOException; /** Returns the number of documents containing the @@ -58,25 +56,25 @@ } // nocommit -- or maybe make a separate positions(...) method? - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method will + * not return null. */ public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { return termsEnum.docs(skipDocs, reuse); } else { - return null; + return DocsEnum.EMPTY; } } - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method will + * not return null. */ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { return termsEnum.docsAndPositions(skipDocs, reuse); } else { - return null; + return DocsAndPositionsEnum.EMPTY; } } @@ -97,4 +95,38 @@ protected void close() { threadEnums.close(); } + public final static Terms[] EMPTY_ARRAY = new Terms[0]; + + /** Provides zero terms */ + public final static Terms EMPTY = new Terms() { + @Override + public TermsEnum iterator() { + return TermsEnum.EMPTY; + } + + @Override + public BytesRef.Comparator getComparator() { + return BytesRef.getUTF8SortedAsUTF16Comparator(); + } + + @Override + public int docFreq(BytesRef text) { + return 0; + } + + @Override + public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) { + return DocsEnum.EMPTY; + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) { + return DocsAndPositionsEnum.EMPTY; + } + + @Override + public long getUniqueTermCount() { + return 0; + } + }; } Index: src/java/org/apache/lucene/index/DocsAndPositionsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (revision 919739) +++ src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (working copy) @@ -41,4 +41,47 @@ public final int read(int[] docs, int[] freqs) { throw new UnsupportedOperationException(); } + + /** Provides zero docs */ + public static final DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnum() { + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public int freq() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int docID() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int nextPosition() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int getPayloadLength() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public BytesRef getPayload() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public boolean hasPayload() { + throw new IllegalStateException("this method should never be called"); + } + }; } Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 919739) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -67,4 +67,33 @@ } return count; } + + /** Provides zero docs */ + public static final DocsEnum EMPTY = new DocsEnum() { + + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public int freq() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int docID() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int read(int[] docs, int[] freqs) { + return 0; + } + }; } Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 919739) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -30,7 +30,6 @@ import org.apache.lucene.index.DirectoryReader.MultiTermPositions; // deprecated import org.apache.lucene.search.Similarity; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close -import org.apache.lucene.util.MultiBits; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; @@ -46,7 +45,8 @@ private int maxDoc = 0; private int numDocs = -1; private boolean hasDeletions = false; - private MultiBits deletedDocs; + // nocommit remove: + //private MultiBits deletedDocs; //private Fields fields; /** @@ -76,7 +76,7 @@ this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; - Bits[] subs = new Bits[subReaders.length]; + //Bits[] subs = new Bits[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; @@ -92,7 +92,7 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; } - subs[i] = subReaders[i].getDeletedDocs(); + //subs[i] = subReaders[i].getDeletedDocs(); final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), @@ -101,11 +101,13 @@ } starts[subReaders.length] = maxDoc; + /* if (hasDeletions) { deletedDocs = new MultiBits(subs, starts); } else { deletedDocs = null; } + */ } @Override @@ -115,7 +117,7 @@ @Override public Fields fields() throws IOException { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } /** @@ -162,11 +164,7 @@ @Override public Bits getDeletedDocs() throws IOException { - if (subReaders.length == 1) { - return subReaders[0].getDeletedDocs(); - } else { - return deletedDocs; - } + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } /** Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 919739) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -39,7 +39,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.MultiBits; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close @@ -352,6 +351,7 @@ buffer.append('('); for(SegmentReader r : subReaders) { buffer.append(r); + buffer.append(' '); } buffer.append(')'); return buffer.toString(); @@ -360,7 +360,7 @@ private void initialize(SegmentReader[] subReaders) throws IOException { this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array - Bits[] subs = new Bits[subReaders.length]; + //Bits[] subs = new Bits[subReaders.length]; final List subFields = new ArrayList(); final List fieldSlices = new ArrayList(); @@ -371,7 +371,7 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; - subs[i] = subReaders[i].getDeletedDocs(); + //subs[i] = subReaders[i].getDeletedDocs(); } final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i); @@ -385,19 +385,19 @@ } starts[subReaders.length] = maxDoc; + /* + // nocommit remove: if (hasDeletions) { deletedDocs = new MultiBits(subs, starts); } else { deletedDocs = null; } + */ } - private Bits deletedDocs; - @Override public Bits getDeletedDocs() { - // nocommit -- maybe not supported? - return deletedDocs; + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } @Override @@ -722,7 +722,6 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - //nocommit: investigate this opto if (subReaders.length == 1) { // Optimize single segment case: return subReaders[0].terms(); @@ -786,9 +785,9 @@ @Override public Fields fields() throws IOException { if (subReaders.length == 0) { - return null; + return Fields.EMPTY; } else { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } } Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 919739) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -175,14 +175,6 @@ // Basic files for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) { - - // nocommit - /* - if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx()) - continue; - - */ - if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) && !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) files.add(IndexFileNames.segmentFileName(segment, ext)); @@ -571,12 +563,25 @@ final List fields = new ArrayList(); final List subReaders = new ArrayList(); final List slices = new ArrayList(); + final List bits = new ArrayList(); + final List bitsStarts = new ArrayList(); final int numReaders = readers.size(); for(int i=0;iNOTE: if this is a multi reader ({@link * #getSequentialSubReaders} is not null) then this @@ -918,12 +917,7 @@ * account deleted documents that have not yet been * merged away. */ public int docFreq(String field, BytesRef term) throws IOException { - final Terms terms = fields().terms(field); - if (terms != null) { - return terms.docFreq(term); - } else { - return 0; - } + return fields().terms(field).docFreq(term); } /** Returns an enumeration of all the documents which contain @@ -949,63 +943,25 @@ } public Terms terms(String field) throws IOException { - final Fields fields = fields(); - if (fields != null) { - return fields.terms(field); - } else { - return null; - } + return fields().terms(field); } /** Returns {@link DocsEnum} for the specified field & * term. This may return null, for example if either the * field or term does not exist. */ public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } - } - - return null; + return fields().terms(field).docs(skipDocs, term, null); } /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null, for example if * either the field or term does not exist. */ public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - - final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } - } - - return null; + return fields().terms(field).docsAndPositions(skipDocs, term, null); } /** Returns an unpositioned {@link TermDocs} enumerator. @@ -1230,12 +1186,14 @@ } } - /** - * Returns the {@link Bits} representing deleted docs. A - * set bit indicates the doc ID has been deleted. This - * method should return null when there are no deleted - * docs. */ private Bits deletedDocsBits; + + /** Returns the {@link Bits} representing deleted docs. A + * set bit indicates the doc ID has been deleted. This + * method should return null when there are no deleted + * docs. + * + * @lucene.experimental */ public Bits getDeletedDocs() throws IOException { if (deletedDocsBits == null) { deletedDocsBits = new DeletedDocsBits(); @@ -1431,13 +1389,25 @@ private Fields fields; - /** lucene.experimental */ - public void storeFields(Fields fields) { + /** lucene.internal */ + void storeFields(Fields fields) { this.fields = fields; } - /** lucene.experimental */ - public Fields retrieveFields() { + /** lucene.internal */ + Fields retrieveFields() { return fields; } + + private Bits storedDelDocs; + + /** lucene.internal */ + void storeDelDocs(Bits delDocs) { + this.storedDelDocs = delDocs; + } + + /** lucene.internal */ + Bits retrieveDelDocs() { + return storedDelDocs; + } } Index: src/java/org/apache/lucene/index/TermsHashPerThread.java =================================================================== --- src/java/org/apache/lucene/index/TermsHashPerThread.java (revision 919739) +++ src/java/org/apache/lucene/index/TermsHashPerThread.java (working copy) @@ -36,6 +36,10 @@ final RawPostingList freePostings[] = new RawPostingList[256]; int freePostingsCount; + // Used when comparing postings via termRefComp, in TermsHashPerField + final BytesRef tr1 = new BytesRef(); + final BytesRef tr2 = new BytesRef(); + // Used by perField: final BytesRef utf8 = new BytesRef(10); Index: src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (revision 919739) +++ src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (working copy) @@ -95,9 +95,6 @@ public void abort() {} - // nocommit -- should be @ thread level not field - private final BytesRef flushTerm = new BytesRef(); - /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to @@ -109,6 +106,8 @@ final int numPostings = termsHashPerField.numPostings; + final BytesRef flushTerm = perThread.flushTerm; + assert numPostings >= 0; if (!doVectors || numPostings == 0) @@ -128,8 +127,8 @@ perThread.doc.addField(termsHashPerField.fieldInfo.number); - // nocommit -- should I sort by whatever terms dict is - // sorting by? + // TODO: we may want to make this sort in same order + // as Codec's terms dict? final RawPostingList[] postings = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUTF16Comparator()); tvf.writeVInt(numPostings); Index: src/java/org/apache/lucene/index/Fields.java =================================================================== --- src/java/org/apache/lucene/index/Fields.java (revision 919739) +++ src/java/org/apache/lucene/index/Fields.java (working copy) @@ -19,18 +19,33 @@ import java.io.IOException; -/** Access to fields and terms +/** Flex API for access to fields and terms * @lucene.experimental */ public abstract class Fields { - public final static Fields[] EMPTY_ARRAY = new Fields[0]; - /** Returns an iterator that will step through all fields - * names */ + * names. This method will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field */ + /** Get the {@link Terms} for this field. This method + * will not return null. */ public abstract Terms terms(String field) throws IOException; + + public final static Fields[] EMPTY_ARRAY = new Fields[0]; + + /** Provides zero fields */ + public final static Fields EMPTY = new Fields() { + + @Override + public FieldsEnum iterator() { + return FieldsEnum.EMPTY; + } + + @Override + public Terms terms(String field) { + return Terms.EMPTY; + } + }; } Index: src/java/org/apache/lucene/index/codecs/MergeState.java =================================================================== --- src/java/org/apache/lucene/index/codecs/MergeState.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/MergeState.java (working copy) @@ -34,6 +34,7 @@ public int[] delCounts; // Deletion count per reader public int[] docBase; // New docID base per reader public int mergedDocCount; // Total # merged docs + public Bits multiDeletedDocs; // Updated per field; Index: src/java/org/apache/lucene/index/codecs/TermsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/TermsConsumer.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/TermsConsumer.java (working copy) @@ -51,6 +51,7 @@ public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException { BytesRef term; + assert termsEnum != null; if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (docsEnum == null) { @@ -61,14 +62,14 @@ MultiDocsEnum docsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsEnum docsEnumIn2 = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); - if (docsEnumIn2 != null) { - docsEnumIn = docsEnumIn2; - docsEnum.reset(docsEnumIn); - final PostingsConsumer postingsConsumer = startTerm(term); - final int numDocs = postingsConsumer.merge(mergeState, docsEnum); - finishTerm(term, numDocs); - } + // nocommit -- dangerous? what if .EMPTY is + // returned here? + docsEnumIn = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); + assert docsEnumIn != null; + docsEnum.reset(docsEnumIn); + final PostingsConsumer postingsConsumer = startTerm(term); + final int numDocs = postingsConsumer.merge(mergeState, docsEnum); + finishTerm(term, numDocs); } } else { if (postingsEnum == null) { @@ -77,14 +78,14 @@ postingsEnum.setMergeState(mergeState); MultiDocsAndPositionsEnum postingsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsAndPositionsEnum postingsEnumIn2 = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); - if (postingsEnumIn2 != null) { - postingsEnumIn = postingsEnumIn2; - postingsEnum.reset(postingsEnumIn); - final PostingsConsumer postingsConsumer = startTerm(term); - final int numDocs = postingsConsumer.merge(mergeState, postingsEnum); - finishTerm(term, numDocs); - } + // nocommit -- dangerous? what if .EMPTY is + // returned here? + postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); + assert postingsEnumIn != null; + postingsEnum.reset(postingsEnumIn); + final PostingsConsumer postingsConsumer = startTerm(term); + final int numDocs = postingsConsumer.merge(mergeState, postingsEnum); + finishTerm(term, numDocs); } } } Index: src/java/org/apache/lucene/index/codecs/Codec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codec.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/Codec.java (working copy) @@ -59,6 +59,7 @@ // nocommit -- add a "required capabilities" here; this // way merging could say only "TERMS_LINEAR_SCAN" but // searching would say "TERMS_RANDOM_ACCESS"? + // nocommit -- make a SegmentReadState (symmetric)? public abstract FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException; /** Gathers files associated with this segment */ Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (working copy) @@ -205,7 +205,12 @@ if (Codec.DEBUG) { System.out.println("stdr.terms field=" + field + " found=" + fields.get(field)); } - return fields.get(field); + final Terms terms = fields.get(field); + if (terms == null) { + return Terms.EMPTY; + } else { + return terms; + } } // Iterates through all fields @@ -552,6 +557,7 @@ System.out.println("stdr.docs"); } DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + assert docsEnum != null; if (Codec.DEBUG) { docsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); } @@ -564,13 +570,12 @@ System.out.println("stdr.docsAndPositions omitTF=" + fieldInfo.omitTermFreqAndPositions); } if (fieldInfo.omitTermFreqAndPositions) { - return null; + return DocsAndPositionsEnum.EMPTY; } else { DocsAndPositionsEnum postingsEnum = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); + assert postingsEnum != null; if (Codec.DEBUG) { - if (postingsEnum != null) { - postingsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); - } + postingsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); } if (Codec.DEBUG) { Codec.debug(" return enum=" + postingsEnum); Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy) @@ -165,7 +165,8 @@ @Override public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { final SegmentDocsEnum docsEnum; - if (reuse == null) { + // nocommit messy + if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { docsEnum = new SegmentDocsEnum(freqIn); } else { docsEnum = (SegmentDocsEnum) reuse; @@ -176,10 +177,10 @@ @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { - return null; + return DocsAndPositionsEnum.EMPTY; } final SegmentDocsAndPositionsEnum docsEnum; - if (reuse == null) { + if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) { docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); } else { docsEnum = (SegmentDocsAndPositionsEnum) reuse; Index: src/java/org/apache/lucene/index/codecs/FieldsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.TermsEnum; import java.io.IOException; @@ -41,14 +40,12 @@ public void merge(MergeState mergeState, Fields fields) throws IOException { FieldsEnum fieldsEnum = fields.iterator(); + assert fieldsEnum != null; String field; while((field = fieldsEnum.next()) != null) { mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); - final TermsEnum termsEnum = fieldsEnum.terms(); - if (termsEnum != null) { - termsConsumer.merge(mergeState, termsEnum); - } + termsConsumer.merge(mergeState, fieldsEnum.terms()); } } } Index: src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy) @@ -144,8 +144,10 @@ doc += docCode >>> 1; // shift off low bit if ((docCode & 1) != 0) // if low bit is set freq = 1; // freq is one - else + else { freq = freqStream.readVInt(); // else read freq + assert freq != 1; + } } count++; Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 919739) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -124,7 +124,7 @@ if (fi != null) { return new PreTerms(fi); } else { - return null; + return Terms.EMPTY; } } @@ -427,6 +427,7 @@ } public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits skipDocs) throws IOException { + //new Throwable().printStackTrace(System.out); pos.setSkipDocs(skipDocs); pos.seek(termEnum); return this; Index: src/java/org/apache/lucene/index/MultiFields.java =================================================================== --- src/java/org/apache/lucene/index/MultiFields.java (revision 919739) +++ src/java/org/apache/lucene/index/MultiFields.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.MultiBits; /** * Exposes flex API, merged from flex API of sub-segments. @@ -45,8 +45,9 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; - private final Map terms = new HashMap(); + private final Map terms = new HashMap(); + /** This method will not return null.*/ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { @@ -61,10 +62,17 @@ final List fields = new ArrayList(); final List slices = new ArrayList(); - ReaderUtil.gatherSubFields(null, fields, slices, r, 0); + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + fields.add(r.fields()); + slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1)); + } + }.run(); + if (fields.size() == 0) { - return null; + return Fields.EMPTY; } else if (fields.size() == 1) { currentFields = fields.get(0); } else { @@ -77,66 +85,64 @@ } } - public static Terms getTerms(IndexReader r, String field) throws IOException { - final Fields fields = getFields(r); - if (fields != null) { - return fields.terms(field); + public static Bits getDeletedDocs(IndexReader r) throws IOException { + Bits result; + if (r.hasDeletions()) { + + result = r.retrieveDelDocs(); + if (result == null) { + + final List bits = new ArrayList(); + final List starts = new ArrayList(); + final List nulls = new ArrayList(); + + final int maxDoc = new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + // record all delDocs, even if they are null + bits.add(r.getDeletedDocs()); + starts.add(base); + } + }.run(); + starts.add(maxDoc); + + assert bits.size() > 0; + if (bits.size() == 1) { + // Only one actual sub reader -- optimize this case + result = bits.get(0); + } else { + result = new MultiBits(bits, starts); + } + r.storeDelDocs(result); + } } else { - return null; + result = null; } + + return result; } + /** This method will not return null.*/ + public static Terms getTerms(IndexReader r, String field) throws IOException { + return getFields(r).terms(field); + } + /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This method will not return null. */ public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } - } - - return null; + return getFields(r).terms(field).docs(skipDocs, term, null); } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This method will not return null. */ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; - - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } - } - - return null; + return getFields(r).terms(field).docsAndPositions(skipDocs, term, null); } - public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { this.subs = subs; this.subSlices = subSlices; @@ -148,14 +154,11 @@ final List fieldsEnums = new ArrayList(); final List fieldsSlices = new ArrayList(); for(int i=0;i allSubReaders, IndexReader reader) { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader itself, and do not recurse - allSubReaders.add(reader); - } else { - for (int i = 0; i < subReaders.length; i++) { - gatherSubReaders(allSubReaders, subReaders[i]); - } + + public static void gatherSubReaders(final List allSubReaders, IndexReader reader) { + try { + new Gather(reader) { + @Override + protected void add(int base, IndexReader r) { + allSubReaders.add(r); + } + }.run(); + } catch (IOException ioe) { + // won't happen + throw new RuntimeException(ioe); } } - public static int gatherSubFields(List readers, List fields, List slices, IndexReader reader, int base) throws IOException { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader's fields - if (readers != null) { - readers.add(reader); + public static abstract class Gather { + private final IndexReader topReader; + + public Gather(IndexReader r) { + topReader = r; + } + + public int run() throws IOException { + return run(0, topReader); + } + + public int run(int docBase) throws IOException { + return run(docBase, topReader); + } + + private int run(int base, IndexReader reader) throws IOException { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + if (subReaders == null) { + // atomic reader + add(base, reader); + base += reader.maxDoc(); + } else { + // composite reader + for (int i = 0; i < subReaders.length; i++) { + base = run(base, subReaders[i]); + } } - fields.add(reader.fields()); - slices.add(new Slice(base, reader.maxDoc(), fields.size()-1)); - base += reader.maxDoc(); - } else { - for (int i = 0; i < subReaders.length; i++) { - base = gatherSubFields(readers, fields, slices, subReaders[i], base); - } + + return base; } - return base; + protected abstract void add(int base, IndexReader r) throws IOException; } /** Index: src/java/org/apache/lucene/util/Bits.java =================================================================== --- src/java/org/apache/lucene/util/Bits.java (revision 919739) +++ src/java/org/apache/lucene/util/Bits.java (working copy) @@ -17,7 +17,13 @@ * limitations under the License. */ +/** + * @lucene.experimental + */ + public interface Bits { public boolean get(int index); public int length(); + + public static final Bits[] EMPTY_ARRAY = new Bits[0]; } Index: src/java/org/apache/lucene/util/MultiBits.java =================================================================== --- src/java/org/apache/lucene/util/MultiBits.java (revision 919739) +++ src/java/org/apache/lucene/util/MultiBits.java (working copy) @@ -17,21 +17,30 @@ * limitations under the License. */ +import java.util.List; + /** * Concatenates multiple Bits together, on every lookup. * *

NOTE: This is very costly, as every lookup must * do a binary search to locate the right sub-reader. + * + * @lucene.experimental */ + public final class MultiBits implements Bits { private final Bits[] subs; // length is 1+subs.length (the last entry has the maxDoc): private final int[] starts; - public MultiBits(Bits[] subs, int[] starts) { - this.subs = subs; - this.starts = starts; + public MultiBits(List bits, List starts) { + assert starts.size() == 1+bits.size(); + this.subs = bits.toArray(Bits.EMPTY_ARRAY); + this.starts = new int[starts.size()]; + for(int i=0;i