Index: src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- src/test/org/apache/lucene/TestExternalCodecs.java (revision 919975) +++ src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -523,17 +523,27 @@ fields = codec.fieldsConsumer(state); codecs.put(codec, fields); } - //System.out.println("field " + field.name + " -> codec " + codec); return fields.addField(field); } @Override public void close() throws IOException { Iterator it = codecs.values().iterator(); + IOException err = null; while(it.hasNext()) { - // nocommit -- catch exc and keep closing the rest? - it.next().close(); + try { + it.next().close(); + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } } + if (err != null) { + throw err; + } } } @@ -606,10 +616,21 @@ @Override public void close() throws IOException { Iterator it = codecs.values().iterator(); + IOException err = null; while(it.hasNext()) { - // nocommit -- catch exc and keep closing the rest? - it.next().close(); + try { + it.next().close(); + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } } + if (err != null) { + throw err; + } } @Override Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 919975) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -4341,13 +4341,6 @@ new IndexWriter(dir, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED).close(); assertTrue(dir.fileExists("myrandomfile")); - - // Make sure this does not copy myrandomfile: - // nocommit -- Directory.copy now copies all files -- - // how to fix? - //Directory dir2 = new RAMDirectory(dir); - //assertTrue(!dir2.fileExists("myrandomfile")); - } finally { dir.close(); _TestUtil.rmDir(indexDir); Index: src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- src/test/org/apache/lucene/index/TestOmitTf.java (revision 919975) +++ src/test/org/apache/lucene/index/TestOmitTf.java (working copy) @@ -80,7 +80,7 @@ // keep things constant d = new Document(); - // Reverese + // Reverse f1.setOmitTermFreqAndPositions(true); d.add(f1); @@ -90,8 +90,9 @@ Random rnd = newRandom(); writer.addDocument(d); + FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); + // force merge - FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); writer.optimize(); // flush writer.close(); Index: src/test/org/apache/lucene/index/FlexTestUtil.java =================================================================== --- src/test/org/apache/lucene/index/FlexTestUtil.java (revision 919975) +++ src/test/org/apache/lucene/index/FlexTestUtil.java (working copy) @@ -73,8 +73,7 @@ public static void verifyFlexVsPreFlex(Random rand, IndexReader r) throws Exception { // First test on DirReader - // nocommit turn back on - // verifyFlexVsPreFlexSingle(rand, r); + verifyFlexVsPreFlexSingle(rand, r); // Then on each individual sub reader IndexReader[] subReaders = r.getSequentialSubReaders(); @@ -86,24 +85,18 @@ } // Then on a new MultiReader - // nocommit -- back on: - if (false) { - IndexReader m = new MultiReader(subReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + IndexReader m = new MultiReader(subReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); // Then on a forced-external reader (forced flex to // emulate API on pre-flex API, which in turn is // emulating pre-flex on flex -- twisted, but, better // work): - // nocommit back on - if (false) { - verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); - IndexReader m = new MultiReader(forcedSubReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); + m = new MultiReader(forcedSubReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); } private static void verifyFlexVsPreFlexSingle(Random rand, IndexReader r) throws Exception { @@ -119,9 +112,13 @@ // straight enum of fields/terms/docs/positions TermEnum termEnum = r.terms(); - FieldsEnum fields = r.fields().iterator(); + final Fields fields = MultiFields.getFields(r); + if (fields == null) { + return; + } + FieldsEnum fieldsEnum = fields.iterator(); while(true) { - final String field = fields.next(); + final String field = fieldsEnum.next(); if (field == null) { boolean result = termEnum.next(); if (result) { @@ -130,7 +127,7 @@ assertFalse(result); break; } - TermsEnum terms = fields.terms(); + TermsEnum terms = fieldsEnum.terms(); DocsAndPositionsEnum postings = null; DocsEnum docsEnum = null; final TermPositions termPos = r.termPositions(); @@ -146,8 +143,8 @@ assertEquals(termEnum.docFreq(), terms.docFreq()); //allTerms.add(t); - postings = terms.docsAndPositions(r.getDeletedDocs(), postings); - docsEnum = terms.docs(r.getDeletedDocs(), docsEnum); + postings = terms.docsAndPositions(MultiFields.getDeletedDocs(r), postings); + docsEnum = terms.docs(MultiFields.getDeletedDocs(r), docsEnum); final DocsEnum docs; if (postings != null) { @@ -166,16 +163,18 @@ assertTrue(termPos.next()); assertEquals(termPos.doc(), doc); assertEquals(termPos.freq(), docs.freq()); - //System.out.println("TEST: doc=" + doc + " freq=" + docs.freq()); final int freq = docs.freq(); if (postings == null) { assertEquals(1, freq); - assertEquals(0, termPos.nextPosition()); + // Old API did not always do this, + // specifically in the MultiTermPositions + // case when some segs omit positions and + // some don't + //assertEquals(0, termPos.nextPosition()); assertEquals(false, termPos.isPayloadAvailable()); } else { for(int i=0;i 1) { postingsEnum = new UnionDocsAndPositionsEnum(reader, terms); } else { - postingsEnum = reader.termPositionsEnum(reader.getDeletedDocs(), + postingsEnum = reader.termPositionsEnum(MultiFields.getDeletedDocs(reader), terms[0].field(), new BytesRef(terms[0].text())); } @@ -454,7 +455,7 @@ public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { List docsEnums = new LinkedList(); - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); for (int i = 0; i < terms.length; i++) { DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs, terms[i].field(), Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -20,9 +20,9 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -52,6 +52,7 @@ @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (prefix.text().length() == 0) { + // no prefix -- match all terms for this field: final Terms terms = MultiFields.getTerms(reader, getField()); return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; } Index: src/java/org/apache/lucene/search/function/ValueSourceQuery.java =================================================================== --- src/java/org/apache/lucene/search/function/ValueSourceQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/function/ValueSourceQuery.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; import org.apache.lucene.index.Term; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -137,7 +138,7 @@ qWeight = w.getValue(); // this is when/where the values are first created. vals = valSrc.getValues(reader); - delDocs = reader.getDeletedDocs(); + delDocs = MultiFields.getDeletedDocs(reader); maxDoc = reader.maxDoc(); } Index: src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/PhraseQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Explanation.IDFExplanation; @@ -154,21 +155,18 @@ return null; DocsAndPositionsEnum[] postings = new DocsAndPositionsEnum[terms.size()]; - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); final BytesRef text = new BytesRef(t.text()); - // NOTE: debateably, the caller should never pass in a - // multi reader... DocsAndPositionsEnum postingsEnum = MultiFields.getTermPositionsEnum(reader, delDocs, t.field(), text); + // PhraseQuery on a field that did not index + // positions. if (postingsEnum == null) { - if (MultiFields.getTermDocsEnum(reader, - delDocs, - t.field(), - text) != null) { + if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), text) != null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")"); } else { Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -144,7 +144,7 @@ // NOTE: debateably, the caller should never pass in a // multi reader... final Terms terms = MultiFields.getTerms(reader, field); - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return (terms != null) ? terms.iterator() : null; } return new TermRangeTermsEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); Index: src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -75,7 +75,7 @@ public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { // NOTE: debateably, the caller should never pass in a // multi reader... - DocsEnum docs = MultiFields.getTermDocsEnum(reader, reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs == null) { return null; } @@ -118,7 +118,7 @@ Explanation tfExplanation = new Explanation(); int tf = 0; - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs != null) { int newDoc = docs.advance(doc); if (newDoc == doc) { Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 919975) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -283,7 +283,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -337,7 +337,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -396,7 +396,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -468,7 +468,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -536,7 +536,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -606,7 +606,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -658,7 +658,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); @@ -711,7 +711,7 @@ if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); Index: src/java/org/apache/lucene/search/AutomatonQuery.java =================================================================== --- src/java/org/apache/lucene/search/AutomatonQuery.java (revision 919975) +++ src/java/org/apache/lucene/search/AutomatonQuery.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -79,9 +78,7 @@ // matches all possible strings if (BasicOperations.isTotal(automaton)) { - final Terms terms = MultiFields.getTerms(reader, getField()); - // nocommit -- should we just return null? singleton? - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return MultiFields.getTerms(reader, getField()).iterator(); } // matches a fixed string in singleton representation Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -197,11 +197,11 @@ this.term = term; td.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs()"); } return this; @@ -256,11 +256,11 @@ this.term = term; tp.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs() skipDocs=" + skipDocs + " MultiFields.getDeletedDocs=" + MultiFields.getDeletedDocs(r) + " r=" + r); } return this; Index: src/java/org/apache/lucene/index/FieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/FieldsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/FieldsEnum.java (working copy) @@ -28,7 +28,8 @@ public abstract class FieldsEnum { - public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + // TODO: maybe allow retrieving FieldInfo for current + // field, as optional method? private AttributeSource atts = null; @@ -36,25 +37,38 @@ * Returns the related attributes. */ public AttributeSource attributes() { - if (atts == null) atts = new AttributeSource(); + if (atts == null) { + atts = new AttributeSource(); + } return atts; } - // nocommit -- do we need seek? - // nocommit -- should this return FieldInfo? /** Increments the enumeration to the next field. The * returned field is always interned, so simple == * comparison is allowed. Returns null when there are no * more fields.*/ public abstract String next() throws IOException; - // nocommit should we add a field()? fieldInfo()? - // mirrors TermsEnum - /** Get {@link TermsEnum} for the current field. You - * should not call {@link #next()} until you're done - * using this {@link TermsEnum}. After {@link #next} - * returns null, this method should not be called. */ + * should not call {@link #next} until you're done using + * this {@link TermsEnum}. After {@link #next} returns + * null this method should not be called. This method + * will not return null. */ public abstract TermsEnum terms() throws IOException; + + public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + + /** Provides zero fields */ + public final static FieldsEnum EMPTY = new FieldsEnum() { + + @Override + public String next() { + return null; + } + + @Override + public TermsEnum terms() { + throw new IllegalStateException("this method should never be called"); + } + }; } - Index: src/java/org/apache/lucene/index/MultiTerms.java =================================================================== --- src/java/org/apache/lucene/index/MultiTerms.java (revision 919975) +++ src/java/org/apache/lucene/index/MultiTerms.java (working copy) @@ -1,6 +1,5 @@ package org.apache.lucene.index; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -72,7 +71,7 @@ if (termsEnums.size() > 0) { return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY)); } else { - return null; + return TermsEnum.EMPTY; } } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 919975) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -90,13 +90,12 @@ private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false - // nocommit: unread field - private boolean flexPostings; // True if postings were written with new flex format private Codec codec; private Map diagnostics; + // nocommit -- who uses this one? public SegmentInfo(String name, int docCount, Directory dir, Codec codec) { this.name = name; this.docCount = docCount; @@ -110,17 +109,9 @@ docStoreIsCompoundFile = false; delCount = 0; hasProx = true; - flexPostings = true; this.codec = codec; } - // nocommit -- this ctor is only used by back-compat tests - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { - this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true, null); - SegmentWriteState state = new SegmentWriteState(null, dir, name, null, null, docCount, docCount, -1, Codecs.getDefault()); - codec = state.codec = Codecs.getDefault().getWriter(state); - } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { @@ -627,11 +618,9 @@ if (useCompoundFile) { files.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { - final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { addIfExists(files, IndexFileNames.segmentFileName(name, ext)); } - // nocommit -- only does ifExists on prx for standard codec codec.files(dir, this, files); } Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 919975) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -171,7 +171,7 @@ if (terms != null) { return terms.iterator(); } else { - return null; + return TermsEnum.EMPTY; } } } @@ -196,7 +196,7 @@ @Override public Bits getDeletedDocs() throws IOException { - return ((IndexReader) readers.get(0)).getDeletedDocs(); + return MultiFields.getDeletedDocs(readers.get(0)); } @Override Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 919975) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -57,7 +57,6 @@ private SegmentInfo si; private int readBufferSize; - boolean isPreFlex; CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); @@ -835,7 +834,7 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -852,7 +851,7 @@ @Override public TermEnum terms(Term t) throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -901,7 +900,7 @@ @Override public TermDocs termDocs() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -921,7 +920,7 @@ @Override public TermPositions termPositions() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 919975) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -384,9 +384,9 @@ public Object clone() { SegmentInfos sis = (SegmentInfos) super.clone(); for(int i=0;i(userData); return sis; Index: src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndex.java (revision 919975) +++ src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -492,7 +492,7 @@ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index - segInfoStat.termIndexStatus = testTermIndex(info, reader); + segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); @@ -575,7 +575,7 @@ /** * Test the term index. */ - private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final int maxDoc = reader.maxDoc(); Index: src/java/org/apache/lucene/index/Terms.java =================================================================== --- src/java/org/apache/lucene/index/Terms.java (revision 919975) +++ src/java/org/apache/lucene/index/Terms.java (working copy) @@ -29,17 +29,16 @@ public abstract class Terms { - public final static Terms[] EMPTY_ARRAY = new Terms[0]; - // Privately cache a TermsEnum per-thread for looking up // docFreq and getting a private DocsEnum private final CloseableThreadLocal threadEnums = new CloseableThreadLocal(); - /** Returns an iterator that will step through all terms */ + /** Returns an iterator that will step through all + * terms. This method will not return null.*/ public abstract TermsEnum iterator() throws IOException; /** Return the BytesRef Comparator used to sort terms - * provided by the iterator. NOTE: this may return null + * provided by the iterator. This method may return null * if there are no terms. This method may be invoked * many times; it's best to cache a single instance & * reuse it. */ @@ -57,9 +56,8 @@ } } - // nocommit -- or maybe make a separate positions(...) method? - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method may + * return null if the term does not exist. */ public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -69,8 +67,9 @@ } } - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method will + * may return null if the term does not exists, or + * positions were not indexed. */ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -97,4 +96,40 @@ protected void close() { threadEnums.close(); } + public final static Terms[] EMPTY_ARRAY = new Terms[0]; + + /** Provides zero terms */ + /* + public final static Terms EMPTY = new Terms() { + @Override + public TermsEnum iterator() { + return TermsEnum.EMPTY; + } + + @Override + public BytesRef.Comparator getComparator() { + return BytesRef.getUTF8SortedAsUTF16Comparator(); + } + + @Override + public int docFreq(BytesRef text) { + return 0; + } + + @Override + public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) { + return DocsEnum.EMPTY; + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) { + return DocsAndPositionsEnum.EMPTY; + } + + @Override + public long getUniqueTermCount() { + return 0; + } + }; + */ } Index: src/java/org/apache/lucene/index/DocsAndPositionsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (working copy) @@ -41,4 +41,49 @@ public final int read(int[] docs, int[] freqs) { throw new UnsupportedOperationException(); } + + /** Provides zero docs */ + /* + public static final DocsAndPositionsEnum EMPTY = new DocsAndPositionsEnum() { + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public int freq() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int docID() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int nextPosition() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int getPayloadLength() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public BytesRef getPayload() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public boolean hasPayload() { + throw new IllegalStateException("this method should never be called"); + } + }; + */ } Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -44,11 +44,10 @@ return atts; } - // nocommit -- state in API that doc/freq are undefined - // (defined?) after this? // nocommit -- fix this API so that intblock codecs are // able to return their own int arrays, to save a copy... IntsRef? - /** Bulk read: returns number of docs read. + /** Bulk read: returns number of docs read. After this is + * called, {@link #doc} and {@link #freq} are undefined. * *

NOTE: the default impl simply delegates to {@link * #nextDoc}, but subclasses may do this more @@ -67,4 +66,35 @@ } return count; } + + /** Provides zero docs */ + /* + public static final DocsEnum EMPTY = new DocsEnum() { + + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public int freq() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int docID() { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public int read(int[] docs, int[] freqs) { + return 0; + } + }; + */ } Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 919975) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -30,7 +30,6 @@ import org.apache.lucene.index.DirectoryReader.MultiTermPositions; // deprecated import org.apache.lucene.search.Similarity; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close -import org.apache.lucene.util.MultiBits; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; @@ -46,8 +45,6 @@ private int maxDoc = 0; private int numDocs = -1; private boolean hasDeletions = false; - private MultiBits deletedDocs; - //private Fields fields; /** *

Construct a MultiReader aggregating the named set of (sub)readers. @@ -76,7 +73,6 @@ this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; - Bits[] subs = new Bits[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; @@ -92,7 +88,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; } - subs[i] = subReaders[i].getDeletedDocs(); final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), @@ -101,11 +96,6 @@ } starts[subReaders.length] = maxDoc; - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } } @Override @@ -115,7 +105,7 @@ @Override public Fields fields() throws IOException { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } /** @@ -162,11 +152,7 @@ @Override public Bits getDeletedDocs() throws IOException { - if (subReaders.length == 1) { - return subReaders[0].getDeletedDocs(); - } else { - return deletedDocs; - } + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } /** Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 919975) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -39,7 +39,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.MultiBits; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close @@ -352,6 +351,7 @@ buffer.append('('); for(SegmentReader r : subReaders) { buffer.append(r); + buffer.append(' '); } buffer.append(')'); return buffer.toString(); @@ -360,7 +360,6 @@ private void initialize(SegmentReader[] subReaders) throws IOException { this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array - Bits[] subs = new Bits[subReaders.length]; final List subFields = new ArrayList(); final List fieldSlices = new ArrayList(); @@ -371,7 +370,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; - subs[i] = subReaders[i].getDeletedDocs(); } final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i); @@ -384,20 +382,11 @@ } } starts[subReaders.length] = maxDoc; - - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } } - private Bits deletedDocs; - @Override public Bits getDeletedDocs() { - // nocommit -- maybe not supported? - return deletedDocs; + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } @Override @@ -722,7 +711,6 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - //nocommit: investigate this opto if (subReaders.length == 1) { // Optimize single segment case: return subReaders[0].terms(); @@ -785,11 +773,7 @@ @Override public Fields fields() throws IOException { - if (subReaders.length == 0) { - return null; - } else { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); - } + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } @Override Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 919975) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -175,14 +175,6 @@ // Basic files for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) { - - // nocommit - /* - if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx()) - continue; - - */ - if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) && !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) files.add(IndexFileNames.segmentFileName(segment, ext)); @@ -571,12 +563,25 @@ final List fields = new ArrayList(); final List subReaders = new ArrayList(); final List slices = new ArrayList(); + final List bits = new ArrayList(); + final List bitsStarts = new ArrayList(); final int numReaders = readers.size(); for(int i=0;i files) {} - static final class PostingList extends RawPostingList { int docFreq; // # times this term occurs in the current doc int lastDocID; // Last docID where this term occurred Index: src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -74,7 +74,7 @@ /** Returns current term. Do not call this before calling * next() for the first time, after next() returns null - * or seek returns {@link SeekStatus#END}.*/ + * or after seek returns {@link SeekStatus#END}.*/ public abstract BytesRef term() throws IOException; /** Returns ordinal position for current term. This is an @@ -91,23 +91,25 @@ * {@link SeekStatus#END}.*/ public abstract int docFreq(); - // nocommit -- clarify if this may return null - // nocommit -- maybe require up front boolean doPositions? - // nocommit -- or maybe make a separate positions(...) method? /** Get {@link DocsEnum} for the current term. Do not - * call this before calling next() for the first time, - * after next() returns null or seek returns {@link - * SeekStatus#END}. + * call this before calling {@link #next} or {@link + * #seek} for the first time. This method will not + * return null. * * @param skipDocs set bits are documents that should not * be returned * @param reuse pass a prior DocsEnum for possible reuse */ public abstract DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException; + /** Get {@link DocsAndPositionsEnum} for the current term. + * Do not call this before calling {@link #next} or + * {@link #seek} for the first time. This method will + * only return null if positions were not indexed into + * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; /** Return the {@link BytesRef} Comparator used to sort - * terms provided by the iterator. NOTE: this may return + * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this * method many times, so it's best to cache a single * instance & reuse it. */ @@ -128,7 +130,9 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override - public BytesRef term() { return null; } + public BytesRef term() { + throw new IllegalStateException("this method should never be called"); + } @Override public BytesRef.Comparator getComparator() { @@ -137,21 +141,29 @@ } @Override - public int docFreq() { return -1; } + public int docFreq() { + throw new IllegalStateException("this method should never be called"); + } @Override - public long ord() { return -1; } + public long ord() { + throw new IllegalStateException("this method should never be called"); + } @Override - public DocsEnum docs(Bits bits, DocsEnum reuse) { return null; } + public DocsEnum docs(Bits bits, DocsEnum reuse) { + throw new IllegalStateException("this method should never be called"); + } @Override public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) { - return null; + throw new IllegalStateException("this method should never be called"); } @Override - public BytesRef next() { return null; } + public BytesRef next() { + return null; + } @Override // make it synchronized here, to prevent double lazy init public synchronized AttributeSource attributes() { Index: src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 919975) +++ src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -35,8 +35,8 @@ public final class MultiTermsEnum extends TermsEnum { private final TermMergeQueue queue; - private final TermsEnumWithSlice[] subs; - private final TermsEnumWithSlice[] currentSubs; + private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader) + private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field private final TermsEnumWithSlice[] top; private final MultiDocsEnum.EnumWithSlice[] subDocs; private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions; @@ -93,7 +93,7 @@ /** The terms array must be newly created TermsEnum, ie * {@link TermsEnum#next} has not yet been called. */ - public MultiTermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { + public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { assert termsEnumsIndex.length <= top.length; numSubs = 0; numTop = 0; @@ -128,7 +128,7 @@ } if (queue.size() == 0) { - return null; + return TermsEnum.EMPTY; } else { return this; } @@ -141,12 +141,12 @@ for(int i=0;iNOTE: if this is a multi reader ({@link * #getSequentialSubReaders} is not null) then this @@ -914,16 +914,20 @@ public abstract int docFreq(Term t) throws IOException; /** Returns the number of documents containing the term - * t. This method does not take into - * account deleted documents that have not yet been - * merged away. */ + * t. This method returns 0 if the term or + * field does not exists. This method does not take into + * account deleted documents that have not yet been merged + * away. */ public int docFreq(String field, BytesRef term) throws IOException { - final Terms terms = fields().terms(field); - if (terms != null) { - return terms.docFreq(term); - } else { + final Fields fields = fields(); + if (fields == null) { return 0; } + final Terms terms = fields.terms(field); + if (terms == null) { + return 0; + } + return terms.docFreq(term); } /** Returns an enumeration of all the documents which contain @@ -948,64 +952,50 @@ return termDocs; } + /** This may return null if the field does not exist.*/ public Terms terms(String field) throws IOException { final Fields fields = fields(); - if (fields != null) { - return fields.terms(field); - } else { + if (fields == null) { return null; } + return fields.terms(field); } /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This may return null, if either the field or + * term does not exist. */ public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } + if (fields == null) { + return null; } - - return null; + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, term, null); + } else { + return null; + } } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This may return null, if either the + * field or term does not exist, or, positions were not + * stored for this term. */ public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } + if (fields == null) { + return null; } - - return null; + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, null); + } else { + return null; + } } /** Returns an unpositioned {@link TermDocs} enumerator. @@ -1230,12 +1220,14 @@ } } - /** - * Returns the {@link Bits} representing deleted docs. A - * set bit indicates the doc ID has been deleted. This - * method should return null when there are no deleted - * docs. */ private Bits deletedDocsBits; + + /** Returns the {@link Bits} representing deleted docs. A + * set bit indicates the doc ID has been deleted. This + * method should return null when there are no deleted + * docs. + * + * @lucene.experimental */ public Bits getDeletedDocs() throws IOException { if (deletedDocsBits == null) { deletedDocsBits = new DeletedDocsBits(); @@ -1408,13 +1400,17 @@ * its unique term count. */ public long getUniqueTermCount() throws IOException { long numTerms = 0; - FieldsEnum it = fields().iterator(); + final Fields fields = fields(); + if (fields == null) { + return 0; + } + FieldsEnum it = fields.iterator(); while(true) { String field = it.next(); if (field == null) { break; } - numTerms += fields().terms(field).getUniqueTermCount(); + numTerms += fields.terms(field).getUniqueTermCount(); } return numTerms; } @@ -1431,13 +1427,25 @@ private Fields fields; - /** lucene.experimental */ - public void storeFields(Fields fields) { + /** lucene.internal */ + void storeFields(Fields fields) { this.fields = fields; } - /** lucene.experimental */ - public Fields retrieveFields() { + /** lucene.internal */ + Fields retrieveFields() { return fields; } + + private Bits storedDelDocs; + + /** lucene.internal */ + void storeDelDocs(Bits delDocs) { + this.storedDelDocs = delDocs; + } + + /** lucene.internal */ + Bits retrieveDelDocs() { + return storedDelDocs; + } } Index: src/java/org/apache/lucene/index/TermsHashPerThread.java =================================================================== --- src/java/org/apache/lucene/index/TermsHashPerThread.java (revision 919975) +++ src/java/org/apache/lucene/index/TermsHashPerThread.java (working copy) @@ -36,6 +36,10 @@ final RawPostingList freePostings[] = new RawPostingList[256]; int freePostingsCount; + // Used when comparing postings via termRefComp, in TermsHashPerField + final BytesRef tr1 = new BytesRef(); + final BytesRef tr2 = new BytesRef(); + // Used by perField: final BytesRef utf8 = new BytesRef(10); Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 919975) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -1016,8 +1016,7 @@ // by re-using the same TermsEnum and seeking only // forwards if (term.field() != currentField) { - // nocommit -- once we sync up branch again, add - // assert that this field is always > last one + assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.field(); Terms terms = fields.terms(currentField); if (terms != null) { @@ -1030,8 +1029,10 @@ if (termsEnum == null) { continue; } + assert checkDeleteTerm(term); termRef.copy(term.text()); + if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); Index: src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (revision 919975) +++ src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (working copy) @@ -95,9 +95,6 @@ public void abort() {} - // nocommit -- should be @ thread level not field - private final BytesRef flushTerm = new BytesRef(); - /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to @@ -109,6 +106,8 @@ final int numPostings = termsHashPerField.numPostings; + final BytesRef flushTerm = perThread.flushTerm; + assert numPostings >= 0; if (!doVectors || numPostings == 0) @@ -128,8 +127,8 @@ perThread.doc.addField(termsHashPerField.fieldInfo.number); - // nocommit -- should I sort by whatever terms dict is - // sorting by? + // TODO: we may want to make this sort in same order + // as Codec's terms dict? final RawPostingList[] postings = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUTF16Comparator()); tvf.writeVInt(numPostings); Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 919975) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -614,10 +614,7 @@ // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: - // nocommit: old api - sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor, null); - // nocommit -- if info is from external dir DO NOT - // cache it! + sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor, codecs); readerMap.put(info, sr); } else { if (doOpenStores) { @@ -978,7 +975,6 @@ * false or if there is any other low-level * IO error */ - // nocommit -- need IW.Config!! public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit, Codecs codecs) throws CorruptIndexException, LockObtainFailedException, IOException { init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit, codecs); @@ -3867,10 +3863,7 @@ } } - //nocommit: is this supposed to be here or not? - //merge.info.setHasProx(merger.hasProx()); - // mxx - // System.out.println(Thread.currentThread().getName() + ": finish setHasProx=" + merger.hasProx() + " seg=" + merge.info.name); + merge.info.setHasProx(merger.hasProx()); segmentInfos.subList(start, start + merge.segments.size()).clear(); assert !segmentInfos.contains(merge.info); @@ -4375,8 +4368,7 @@ } // This was a private clone and we had the // only reference - // nocommit -- why commented out? - // assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); + assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); } } } else { @@ -4388,8 +4380,7 @@ if (merge.readersClone[i] != null) { merge.readersClone[i].close(); // This was a private clone and we had the only reference - // nocommit -- why commented out? - //assert merge.readersClone[i].getRefCount() == 0; + assert merge.readersClone[i].getRefCount() == 0; } } } Index: src/java/org/apache/lucene/index/Fields.java =================================================================== --- src/java/org/apache/lucene/index/Fields.java (revision 919975) +++ src/java/org/apache/lucene/index/Fields.java (working copy) @@ -19,18 +19,35 @@ import java.io.IOException; -/** Access to fields and terms +/** Flex API for access to fields and terms * @lucene.experimental */ public abstract class Fields { - public final static Fields[] EMPTY_ARRAY = new Fields[0]; - /** Returns an iterator that will step through all fields - * names */ + * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field */ + /** Get the {@link Terms} for this field. This may return + * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; + + public final static Fields[] EMPTY_ARRAY = new Fields[0]; + + /** Provides zero fields */ + /* + public final static Fields EMPTY = new Fields() { + + @Override + public FieldsEnum iterator() { + return FieldsEnum.EMPTY; + } + + @Override + public Terms terms(String field) { + return Terms.EMPTY; + } + }; + */ } Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (working copy) @@ -53,7 +53,7 @@ // Starts a new term FieldInfo fieldInfo; - // nocommit + // nocommit debugging String desc; /** @lucene.experimental */ Index: src/java/org/apache/lucene/index/codecs/TermsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/TermsConsumer.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/TermsConsumer.java (working copy) @@ -51,6 +51,7 @@ public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException { BytesRef term; + assert termsEnum != null; if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (docsEnum == null) { @@ -61,9 +62,8 @@ MultiDocsEnum docsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsEnum docsEnumIn2 = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); - if (docsEnumIn2 != null) { - docsEnumIn = docsEnumIn2; + docsEnumIn = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); + if (docsEnumIn != null) { docsEnum.reset(docsEnumIn); final PostingsConsumer postingsConsumer = startTerm(term); final int numDocs = postingsConsumer.merge(mergeState, docsEnum); @@ -77,9 +77,8 @@ postingsEnum.setMergeState(mergeState); MultiDocsAndPositionsEnum postingsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsAndPositionsEnum postingsEnumIn2 = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); - if (postingsEnumIn2 != null) { - postingsEnumIn = postingsEnumIn2; + postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); + if (postingsEnumIn != null) { postingsEnum.reset(postingsEnumIn); final PostingsConsumer postingsConsumer = startTerm(term); final int numDocs = postingsConsumer.merge(mergeState, postingsEnum); Index: src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (working copy) @@ -39,7 +39,7 @@ // TODO: -- can we simplify this? public abstract static class Index { - // nocommit + // nocommit -- debugging public String desc; public abstract void read(IndexInput indexIn, boolean absolute) throws IOException; @@ -75,7 +75,7 @@ return bulkResult; } - // nocommit + // nocommit -- debugging public abstract String descFilePointer() throws IOException; } } Index: src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (working copy) @@ -39,7 +39,7 @@ public abstract static class Index { - // nocommit + // nocommit -- debugging public String desc; /** Internally records the current location */ @@ -59,6 +59,6 @@ public abstract void close() throws IOException; - // nocommit + // nocommit -- debugging public abstract String descFilePointer() throws IOException; } Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -352,7 +352,7 @@ } } - // nocommit + // nocommit -- debugging if (Codec.DEBUG) { System.out.println(" return doc=" + doc); } @@ -594,7 +594,7 @@ } } - // nocommit + // nocommit -- debugging if (Codec.DEBUG) { System.out.println(" return doc=" + doc); } Index: src/java/org/apache/lucene/index/codecs/Codec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codec.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/Codec.java (working copy) @@ -59,6 +59,7 @@ // nocommit -- add a "required capabilities" here; this // way merging could say only "TERMS_LINEAR_SCAN" but // searching would say "TERMS_RANDOM_ACCESS"? + // nocommit -- make a SegmentReadState (symmetric)? public abstract FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException; /** Gathers files associated with this segment */ Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (working copy) @@ -552,6 +552,7 @@ System.out.println("stdr.docs"); } DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + assert docsEnum != null; if (Codec.DEBUG) { docsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); } @@ -567,14 +568,12 @@ return null; } else { DocsAndPositionsEnum postingsEnum = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); - if (Codec.DEBUG) { - if (postingsEnum != null) { + if (postingsEnum != null) { + if (Codec.DEBUG) { postingsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); + Codec.debug(" return enum=" + postingsEnum); } } - if (Codec.DEBUG) { - Codec.debug(" return enum=" + postingsEnum); - } return postingsEnum; } } Index: src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (working copy) @@ -37,8 +37,7 @@ private long[] lastSkipProxPointer; private IndexOutput freqOutput; - // nocommit -- private again - public IndexOutput proxOutput; + private IndexOutput proxOutput; private int curDoc; private boolean curStorePayloads; @@ -57,16 +56,6 @@ lastSkipProxPointer = new long[numberOfSkipLevels]; } - // nocommit -- made public - //public void setFreqOutput(IndexOutput freqOutput) { - //this.freqOutput = freqOutput; - //} - - // nocommit -- made public - //public void setProxOutput(IndexOutput proxOutput) { - //this.proxOutput = proxOutput; - //} - /** * Sets the values for the current skip data. */ Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (working copy) @@ -140,9 +140,6 @@ if ((++df % skipInterval) == 0) { skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); skipListWriter.bufferSkip(df); - if (Codec.DEBUG) { - System.out.println(" bufferSkip lastDocID=" + lastDocID + " df=" + df + " freqFP=" + freqOut.getFilePointer() + " proxFP=" + skipListWriter.proxOutput.getFilePointer()); - } } assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; @@ -212,7 +209,8 @@ /** Called when we are done adding docs to this term */ @Override public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { - // nocommit -- wasteful we are counting this in two places? + // TODO: wasteful we are counting this (counting # docs + // for this term) in two places? assert docCount == df; // mxx if (Codec.DEBUG) { @@ -251,7 +249,7 @@ lastDocID = 0; df = 0; - // nocommit + // nocommit -- debugging count = 0; } Index: src/java/org/apache/lucene/index/codecs/FieldsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.TermsEnum; import java.io.IOException; @@ -41,14 +40,12 @@ public void merge(MergeState mergeState, Fields fields) throws IOException { FieldsEnum fieldsEnum = fields.iterator(); + assert fieldsEnum != null; String field; while((field = fieldsEnum.next()) != null) { mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); - final TermsEnum termsEnum = fieldsEnum.terms(); - if (termsEnum != null) { - termsConsumer.merge(mergeState, termsEnum); - } + termsConsumer.merge(mergeState, fieldsEnum.terms()); } } } Index: src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy) @@ -144,8 +144,10 @@ doc += docCode >>> 1; // shift off low bit if ((docCode & 1) != 0) // if low bit is set freq = 1; // freq is one - else + else { freq = freqStream.readVInt(); // else read freq + assert freq != 1; + } } count++; Index: src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (working copy) @@ -206,9 +206,6 @@ } } - // nocommit -- make sure these optimizations survive - // into flex - // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = resources.termEnum; if (enumerator.term() != null // term is at or past current Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 919975) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -348,8 +348,6 @@ @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - // nocommit -- must assert that skipDocs "matches" the - // underlying deletedDocs? if (reuse != null) { return ((PreDocsEnum) reuse).reset(termEnum, skipDocs); } else { @@ -359,8 +357,6 @@ @Override public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { - // nocommit -- must assert that skipDocs "matches" the - // underlying deletedDocs? if (reuse != null) { return ((PreDocsAndPositionsEnum) reuse).reset(termEnum, skipDocs); } else { Index: src/java/org/apache/lucene/index/MultiFields.java =================================================================== --- src/java/org/apache/lucene/index/MultiFields.java (revision 919975) +++ src/java/org/apache/lucene/index/MultiFields.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.MultiBits; /** * Exposes flex API, merged from flex API of sub-segments. @@ -45,13 +45,24 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; - private final Map terms = new HashMap(); + private final Map terms = new HashMap(); + /** Returns a single {@link Fields} instance for this + * reader, merging fields/terms/docs/positions on the + * fly. This method will not return null. + * + *

: this is a slow way to access postings. + * It's better to get the sub-readers (using {@link + * ReaderUtil#Gather}) and iterate through them + * yourself. */ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { // already an atomic reader return r.fields(); + } else if (subs.length == 0) { + // no fields + return null; } else if (subs.length == 1) { return getFields(subs[0]); } else { @@ -61,8 +72,15 @@ final List fields = new ArrayList(); final List slices = new ArrayList(); - ReaderUtil.gatherSubFields(null, fields, slices, r, 0); + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + fields.add(r.fields()); + slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1)); + } + }.run(); + if (fields.size() == 0) { return null; } else if (fields.size() == 1) { @@ -77,66 +95,81 @@ } } + public static Bits getDeletedDocs(IndexReader r) throws IOException { + Bits result; + if (r.hasDeletions()) { + + result = r.retrieveDelDocs(); + if (result == null) { + + final List bits = new ArrayList(); + final List starts = new ArrayList(); + final List nulls = new ArrayList(); + + final int maxDoc = new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + // record all delDocs, even if they are null + bits.add(r.getDeletedDocs()); + starts.add(base); + } + }.run(); + starts.add(maxDoc); + + assert bits.size() > 0; + if (bits.size() == 1) { + // Only one actual sub reader -- optimize this case + result = bits.get(0); + } else { + result = new MultiBits(bits, starts); + } + r.storeDelDocs(result); + } + } else { + result = null; + } + + return result; + } + + /** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { final Fields fields = getFields(r); - if (fields != null) { + if (fields == null) { + return null; + } else { return fields.terms(field); - } else { - return null; } } /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This may return null if the term does not + * exist. */ public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docs(skipDocs, term, null); + } else { + return null; } - - return null; } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This may return null if the term does + * not exist or positions were not indexed. */ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; - - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, null); + } else { + return null; } - - return null; } - public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { this.subs = subs; this.subSlices = subSlices; @@ -148,14 +181,11 @@ final List fieldsEnums = new ArrayList(); final List fieldsSlices = new ArrayList(); for(int i=0;i allSubReaders, IndexReader reader) { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader itself, and do not recurse - allSubReaders.add(reader); - } else { - for (int i = 0; i < subReaders.length; i++) { - gatherSubReaders(allSubReaders, subReaders[i]); - } + + public static void gatherSubReaders(final List allSubReaders, IndexReader reader) { + try { + new Gather(reader) { + @Override + protected void add(int base, IndexReader r) { + allSubReaders.add(r); + } + }.run(); + } catch (IOException ioe) { + // won't happen + throw new RuntimeException(ioe); } } - public static int gatherSubFields(List readers, List fields, List slices, IndexReader reader, int base) throws IOException { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader's fields - if (readers != null) { - readers.add(reader); + public static abstract class Gather { + private final IndexReader topReader; + + public Gather(IndexReader r) { + topReader = r; + } + + public int run() throws IOException { + return run(0, topReader); + } + + public int run(int docBase) throws IOException { + return run(docBase, topReader); + } + + private int run(int base, IndexReader reader) throws IOException { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + if (subReaders == null) { + // atomic reader + add(base, reader); + base += reader.maxDoc(); + } else { + // composite reader + for (int i = 0; i < subReaders.length; i++) { + base = run(base, subReaders[i]); + } } - fields.add(reader.fields()); - slices.add(new Slice(base, reader.maxDoc(), fields.size()-1)); - base += reader.maxDoc(); - } else { - for (int i = 0; i < subReaders.length; i++) { - base = gatherSubFields(readers, fields, slices, subReaders[i], base); - } + + return base; } - return base; + protected abstract void add(int base, IndexReader r) throws IOException; } /** Index: src/java/org/apache/lucene/util/Bits.java =================================================================== --- src/java/org/apache/lucene/util/Bits.java (revision 919975) +++ src/java/org/apache/lucene/util/Bits.java (working copy) @@ -17,7 +17,13 @@ * limitations under the License. */ +/** + * @lucene.experimental + */ + public interface Bits { public boolean get(int index); public int length(); + + public static final Bits[] EMPTY_ARRAY = new Bits[0]; } Index: src/java/org/apache/lucene/util/MultiBits.java =================================================================== --- src/java/org/apache/lucene/util/MultiBits.java (revision 919975) +++ src/java/org/apache/lucene/util/MultiBits.java (working copy) @@ -17,21 +17,30 @@ * limitations under the License. */ +import java.util.List; + /** * Concatenates multiple Bits together, on every lookup. * *

NOTE: This is very costly, as every lookup must * do a binary search to locate the right sub-reader. + * + * @lucene.experimental */ + public final class MultiBits implements Bits { private final Bits[] subs; // length is 1+subs.length (the last entry has the maxDoc): private final int[] starts; - public MultiBits(Bits[] subs, int[] starts) { - this.subs = subs; - this.starts = starts; + public MultiBits(List bits, List starts) { + assert starts.size() == 1+bits.size(); + this.subs = bits.toArray(Bits.EMPTY_ARRAY); + this.starts = new int[starts.size()]; + for(int i=0;i 0) { - sb.append(' '); - } - sb.append(asUnicodeChar(s.charAt(i))); - } - sb.append(']'); - return sb.toString(); - } - public void copy(BytesRef other) { if (bytes == null) { bytes = new byte[other.length]; Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 919975) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -478,7 +478,7 @@ TermsEnum terms = fields.terms(); DocsEnum docs = null; while(terms.next() != null) { - docs = terms.docs(reader.getDeletedDocs(), docs); + docs = terms.docs(MultiFields.getDeletedDocs(reader), docs); while(docs.nextDoc() != docs.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } Index: contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (revision 919975) +++ contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (working copy) @@ -202,6 +202,8 @@ if (oldDels != null) { dels.or(oldDels); } + // nocommit -- not good that this class has to do this... + storeDelDocs(null); } @Override Index: contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (revision 919975) +++ contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (working copy) @@ -100,7 +100,7 @@ positions.clear(); } - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); Terms terms = MultiFields.getTerms(indexReader, field); boolean anyTerms = false; Index: contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (revision 919975) +++ contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; @@ -83,7 +84,7 @@ private OpenBitSet correctBits(IndexReader reader) throws IOException { OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); @@ -121,7 +122,7 @@ OpenBitSet bits=new OpenBitSet(reader.maxDoc()); bits.set(0,reader.maxDoc()); //assume all are valid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator();