Index: src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- src/test/org/apache/lucene/TestExternalCodecs.java (revision 920046) +++ src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -181,11 +181,9 @@ @Override public void finishTerm(BytesRef text, int numDocs) { - // nocommit -- are we even called when numDocs == 0? - if (numDocs > 0) { - assert numDocs == current.docs.size(); - field.termToDocs.put(current.term, current); - } + assert numDocs > 0; + assert numDocs == current.docs.size(); + field.termToDocs.put(current.term, current); } @Override @@ -203,7 +201,7 @@ } @Override - public void addDoc(int docID, int freq) { + public void startDoc(int docID, int freq) { current = new RAMDoc(docID, freq); term.docs.add(current); posUpto = 0; @@ -285,10 +283,10 @@ @Override public SeekStatus seek(BytesRef term) { current = term.utf8ToString(); + it = null; if (ramField.termToDocs.containsKey(current)) { return SeekStatus.FOUND; } else { - // nocommit -- right? if (current.compareTo(ramField.termToDocs.lastKey()) > 0) { return SeekStatus.END; } else { @@ -477,6 +475,7 @@ * You must ensure every field you index has a Codec, or * the defaultCodec is non null. Also, the separate * codecs cannot conflict on file names.*/ + // nocommit -- promote to core public static class PerFieldCodecWrapper extends Codec { private final Map fields = new HashMap(); private final Codec defaultCodec; @@ -523,17 +522,27 @@ fields = codec.fieldsConsumer(state); codecs.put(codec, fields); } - //System.out.println("field " + field.name + " -> codec " + codec); return fields.addField(field); } @Override public void close() throws IOException { Iterator it = codecs.values().iterator(); + IOException err = null; while(it.hasNext()) { - // nocommit -- catch exc and keep closing the rest? - it.next().close(); + try { + it.next().close(); + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } } + if (err != null) { + throw err; + } } } @@ -606,17 +615,27 @@ @Override public void close() throws IOException { Iterator it = codecs.values().iterator(); + IOException err = null; while(it.hasNext()) { - // nocommit -- catch exc and keep closing the rest? - it.next().close(); + try { + it.next().close(); + } catch (IOException ioe) { + // keep first IOException we hit but keep + // closing the rest + if (err == null) { + err = ioe; + } + } } + if (err != null) { + throw err; + } } @Override public void loadTermsIndex(int indexDivisor) throws IOException { Iterator it = codecs.values().iterator(); while(it.hasNext()) { - // nocommit -- catch exc and keep closing the rest? it.next().loadTermsIndex(indexDivisor); } } Index: src/test/org/apache/lucene/search/TestBoolean2.java =================================================================== --- src/test/org/apache/lucene/search/TestBoolean2.java (revision 920046) +++ src/test/org/apache/lucene/search/TestBoolean2.java (working copy) @@ -198,7 +198,6 @@ } public void testRandomQueries() throws Exception { - // nocommit -- remove 17 seed Random rnd = newRandom(); String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"}; Index: src/test/org/apache/lucene/index/TestSegmentTermEnum.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentTermEnum.java (revision 920046) +++ src/test/org/apache/lucene/index/TestSegmentTermEnum.java (working copy) @@ -61,8 +61,6 @@ verifyDocFreq(); } - // nocommit - /* public void testPrevTermAtEnd() throws IOException { Directory dir = new MockRAMDirectory(); @@ -70,16 +68,18 @@ addDoc(writer, "aaa bbb"); writer.close(); SegmentReader reader = SegmentReader.getOnlySegmentReader(dir); - SegmentTermEnum termEnum = (SegmentTermEnum) reader.terms(); - assertTrue(termEnum.next()); - assertEquals("aaa", termEnum.term().text()); - assertTrue(termEnum.next()); - assertEquals("aaa", termEnum.prev().text()); - assertEquals("bbb", termEnum.term().text()); - assertFalse(termEnum.next()); - assertEquals("bbb", termEnum.prev().text()); + TermsEnum terms = reader.fields().terms("content").iterator(); + assertNotNull(terms.next()); + assertEquals("aaa", terms.term().utf8ToString()); + assertNotNull(terms.next()); + long ordB = terms.ord(); + assertEquals("bbb", terms.term().utf8ToString()); + assertNull(terms.next()); + + assertEquals(TermsEnum.SeekStatus.FOUND, terms.seek(ordB)); + assertEquals("bbb", terms.term().utf8ToString()); } - */ + private void verifyDocFreq() throws IOException { Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 920046) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -4341,13 +4341,6 @@ new IndexWriter(dir, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED).close(); assertTrue(dir.fileExists("myrandomfile")); - - // Make sure this does not copy myrandomfile: - // nocommit -- Directory.copy now copies all files -- - // how to fix? - //Directory dir2 = new RAMDirectory(dir); - //assertTrue(!dir2.fileExists("myrandomfile")); - } finally { dir.close(); _TestUtil.rmDir(indexDir); Index: src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- src/test/org/apache/lucene/index/TestCodecs.java (revision 920046) +++ src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.store.*; import java.util.*; -// nocommit -- test multiple codecs here? +// TODO: test multiple codecs here? // TODO // - test across fields @@ -43,14 +43,11 @@ public class TestCodecs extends LuceneTestCase { - // nocommit -- switch to newRandom(): - private static final Random RANDOM = new Random(42); + private Random RANDOM; private static String[] fieldNames = new String[] {"one", "two", "three", "four"}; private final static int NUM_TEST_ITER = 4000; - // nocommit - //private final static int NUM_TEST_THREADS = 3; - private final static int NUM_TEST_THREADS = 1; + private final static int NUM_TEST_THREADS = 3; private final static int NUM_FIELDS = 4; private final static int NUM_TERMS_RAND = 50; // must be > 16 to test skipping private final static int DOC_FREQ_RAND = 500; // must be > 16 to test skipping @@ -167,7 +164,7 @@ } else { termDocFreq = positions[i].length; } - postingsConsumer.addDoc(docs[i], termDocFreq); + postingsConsumer.startDoc(docs[i], termDocFreq); if (!field.omitTF) { for(int j=0;j= term.docs[1+upto2]) { + upto2++; + } + } + } } else { doc = docsEnum.nextDoc(); assertTrue(doc != -1); @@ -565,16 +578,13 @@ } while (termsEnum.next() != null); assertEquals(upto, field.terms.length); - - //termsEnum.close(); } } } private void write(FieldInfos fieldInfos, Directory dir, FieldData[] fields) throws Throwable { - // nocommit -- randomize this: - final int termIndexInterval = 16; + final int termIndexInterval = nextInt(13, 27); SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, null, 10000, 10000, termIndexInterval, Codecs.getDefault()); Index: src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- src/test/org/apache/lucene/index/TestOmitTf.java (revision 920046) +++ src/test/org/apache/lucene/index/TestOmitTf.java (working copy) @@ -80,7 +80,7 @@ // keep things constant d = new Document(); - // Reverese + // Reverse f1.setOmitTermFreqAndPositions(true); d.add(f1); @@ -90,8 +90,9 @@ Random rnd = newRandom(); writer.addDocument(d); + FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); + // force merge - FlexTestUtil.verifyFlexVsPreFlex(rnd, writer); writer.optimize(); // flush writer.close(); Index: src/test/org/apache/lucene/index/FlexTestUtil.java =================================================================== --- src/test/org/apache/lucene/index/FlexTestUtil.java (revision 920046) +++ src/test/org/apache/lucene/index/FlexTestUtil.java (working copy) @@ -25,7 +25,6 @@ public class FlexTestUtil { - // nocommit: // index variations // need del docs // need payloads @@ -73,8 +72,7 @@ public static void verifyFlexVsPreFlex(Random rand, IndexReader r) throws Exception { // First test on DirReader - // nocommit turn back on - // verifyFlexVsPreFlexSingle(rand, r); + verifyFlexVsPreFlexSingle(rand, r); // Then on each individual sub reader IndexReader[] subReaders = r.getSequentialSubReaders(); @@ -86,24 +84,18 @@ } // Then on a new MultiReader - // nocommit -- back on: - if (false) { - IndexReader m = new MultiReader(subReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + IndexReader m = new MultiReader(subReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); // Then on a forced-external reader (forced flex to // emulate API on pre-flex API, which in turn is // emulating pre-flex on flex -- twisted, but, better // work): - // nocommit back on - if (false) { - verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); - IndexReader m = new MultiReader(forcedSubReaders, false); - verifyFlexVsPreFlexSingle(rand, m); - m.close(); - } + verifyFlexVsPreFlexSingle(rand, new ForcedExternalReader(r)); + m = new MultiReader(forcedSubReaders, false); + verifyFlexVsPreFlexSingle(rand, m); + m.close(); } private static void verifyFlexVsPreFlexSingle(Random rand, IndexReader r) throws Exception { @@ -119,9 +111,13 @@ // straight enum of fields/terms/docs/positions TermEnum termEnum = r.terms(); - FieldsEnum fields = r.fields().iterator(); + final Fields fields = MultiFields.getFields(r); + if (fields == null) { + return; + } + FieldsEnum fieldsEnum = fields.iterator(); while(true) { - final String field = fields.next(); + final String field = fieldsEnum.next(); if (field == null) { boolean result = termEnum.next(); if (result) { @@ -130,7 +126,7 @@ assertFalse(result); break; } - TermsEnum terms = fields.terms(); + TermsEnum terms = fieldsEnum.terms(); DocsAndPositionsEnum postings = null; DocsEnum docsEnum = null; final TermPositions termPos = r.termPositions(); @@ -146,8 +142,8 @@ assertEquals(termEnum.docFreq(), terms.docFreq()); //allTerms.add(t); - postings = terms.docsAndPositions(r.getDeletedDocs(), postings); - docsEnum = terms.docs(r.getDeletedDocs(), docsEnum); + postings = terms.docsAndPositions(MultiFields.getDeletedDocs(r), postings); + docsEnum = terms.docs(MultiFields.getDeletedDocs(r), docsEnum); final DocsEnum docs; if (postings != null) { @@ -166,16 +162,18 @@ assertTrue(termPos.next()); assertEquals(termPos.doc(), doc); assertEquals(termPos.freq(), docs.freq()); - //System.out.println("TEST: doc=" + doc + " freq=" + docs.freq()); final int freq = docs.freq(); if (postings == null) { assertEquals(1, freq); - assertEquals(0, termPos.nextPosition()); + // Old API did not always do this, + // specifically in the MultiTermPositions + // case when some segs omit positions and + // some don't + //assertEquals(0, termPos.nextPosition()); assertEquals(false, termPos.isPayloadAvailable()); } else { for(int i=0;i= Byte.MIN_VALUE && num <= Byte.MAX_VALUE) { - return (byte) num; - } else { - throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Byte"); - } + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] + return Byte.parseByte(term.utf8ToString()); } protected Object readResolve() { return DEFAULT_BYTE_PARSER; @@ -169,12 +168,11 @@ /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() { public short parseShort(BytesRef term) { - final long num = FieldCacheImpl.parseLong(term); - if (num >= Short.MIN_VALUE && num <= Short.MAX_VALUE) { - return (short) num; - } else { - throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Short"); - } + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] + return Short.parseShort(term.utf8ToString()); } protected Object readResolve() { return DEFAULT_SHORT_PARSER; @@ -188,12 +186,11 @@ /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ public static final IntParser DEFAULT_INT_PARSER = new IntParser() { public int parseInt(BytesRef term) { - final long num = FieldCacheImpl.parseLong(term); - if (num >= Integer.MIN_VALUE && num <= Integer.MAX_VALUE) { - return (int) num; - } else { - throw new IllegalArgumentException("value \"" + term + "\" is out of bounds for Int"); - } + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] + return Integer.parseInt(term.utf8ToString()); } protected Object readResolve() { return DEFAULT_INT_PARSER; @@ -207,8 +204,10 @@ /** The default parser for float values, which are encoded by {@link Float#toString(float)} */ public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() { public float parseFloat(BytesRef term) { - // TODO: would be far better to directly parse - // the UTF-8 bytes into float, but that's tricky? + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] return Float.parseFloat(term.utf8ToString()); } protected Object readResolve() { @@ -223,7 +222,11 @@ /** The default parser for long values, which are encoded by {@link Long#toString(long)} */ public static final LongParser DEFAULT_LONG_PARSER = new LongParser() { public long parseLong(BytesRef term) { - return FieldCacheImpl.parseLong(term); + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] + return Long.parseLong(term.utf8ToString()); } protected Object readResolve() { return DEFAULT_LONG_PARSER; @@ -237,8 +240,10 @@ /** The default parser for double values, which are encoded by {@link Double#toString(double)} */ public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() { public double parseDouble(BytesRef term) { - // TODO: would be far better to directly parse - // the UTF-8 bytes into float, but that's tricky? + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // NumericField, instead, which already decodes + // directly from byte[] return Double.parseDouble(term.utf8ToString()); } protected Object readResolve() { Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -26,6 +26,9 @@ import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.Terms; import org.apache.lucene.queryParser.QueryParser; // for javadoc import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -186,13 +189,28 @@ private abstract static class BooleanQueryRewrite extends RewriteMethod { protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { + if (query.hasNewAPI) { + + if (query.field == null) { + throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); + } + + final Fields fields = MultiFields.getFields(reader); + if (fields == null) { + // reader has no fields + return 0; + } + + final Terms terms = fields.terms(query.field); + if (terms == null) { + // field does not exist + return 0; + } + final TermsEnum termsEnum = query.getTermsEnum(reader); - if (termsEnum == null) { - return 0; // nocommit, subclass shouldn't deal with this case of nonexistent field - } - if (query.field == null) - throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); + assert termsEnum != null; + if (termsEnum == TermsEnum.EMPTY) return 0; final BoostAttribute boostAtt = @@ -384,7 +402,7 @@ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { Query result = super.rewrite(reader, query); assert result instanceof BooleanQuery; - // nocommit: if empty boolean query return NullQuery + // TODO: if empty boolean query return NullQuery? if (!((BooleanQuery) result).clauses().isEmpty()) { // strip the scores off result = new ConstantScoreQuery(new QueryWrapperFilter(result)); @@ -614,12 +632,13 @@ } /** Construct the enumeration to be used, expanding the - * pattern term. This method must return null if no - * terms fall in the range; else, it must return a - * TermsEnum already positioned to the first matching - * term. - * - * nocommit in 3.x this will become abstract */ + * pattern term. This method should only be called if + * the field exists (ie, implementations can assume the + * field does exist). This method should not return null + * (should instead return {@link TermsEnum#EMPTY} if no + * terms match). The TermsEnum must already be + * positioned to the first matching term. */ + // TODO 4.0: make this method abstract protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { throw new UnsupportedOperationException(); } Index: src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 920046) +++ src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -21,10 +21,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; @@ -107,17 +110,31 @@ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { if (query.hasNewAPI) { + if (query.field == null) { + throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery."); + } + + final Fields fields = MultiFields.getFields(reader); + if (fields == null) { + // reader has no fields + return DocIdSet.EMPTY_DOCIDSET; + } + + final Terms terms = fields.terms(query.field); + if (terms == null) { + // field does not exist + return DocIdSet.EMPTY_DOCIDSET; + } + final TermsEnum termsEnum = query.getTermsEnum(reader); - if (termsEnum == null) { - return DocIdSet.EMPTY_DOCIDSET;// nocommit; - } + assert termsEnum != null; if (termsEnum.next() != null) { // fill into a OpenBitSet final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); final int[] docs = new int[32]; final int[] freqs = new int[32]; int termCount = 0; - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docsEnum = null; do { termCount++; Index: src/java/org/apache/lucene/search/MatchAllDocsQuery.java =================================================================== --- src/java/org/apache/lucene/search/MatchAllDocsQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/MatchAllDocsQuery.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -54,7 +55,7 @@ MatchAllScorer(IndexReader reader, Similarity similarity, Weight w, byte[] norms) throws IOException { super(similarity); - delDocs = reader.getDeletedDocs(); + delDocs = MultiFields.getDeletedDocs(reader); score = w.getValue(); maxDoc = reader.maxDoc(); this.norms = norms; Index: src/java/org/apache/lucene/search/Similarity.java =================================================================== --- src/java/org/apache/lucene/search/Similarity.java (revision 920046) +++ src/java/org/apache/lucene/search/Similarity.java (working copy) @@ -857,7 +857,7 @@ * @return An implementation dependent float to be used as a scoring factor * */ - // nocommit -- swtich to BytesRef + // TODO: maybe switch this API to BytesRef? public float scorePayload(int docId, String fieldName, int start, int end, byte [] payload, int offset, int length) { return 1; Index: src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 920046) +++ src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Bits; import org.apache.lucene.document.NumericField; // for javadocs @@ -537,7 +538,7 @@ if (isCacheable()) { skipDocs = null; } else { - skipDocs = reader.getDeletedDocs(); + skipDocs = MultiFields.getDeletedDocs(reader); } } final int maxDoc = reader.maxDoc(); Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 920046) +++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -19,9 +19,9 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -127,7 +127,8 @@ @Override public BytesRef term() throws IOException { - return (tenum == null) ? null : tenum.term(); + assert tenum != null; + return tenum.term(); } @Override @@ -137,7 +138,8 @@ @Override public int docFreq() { - return (tenum == null) ? -1 : tenum.docFreq(); + assert tenum != null; + return tenum.docFreq(); } /** This enum does not support seeking! @@ -158,17 +160,20 @@ @Override public long ord() throws IOException { - return (tenum == null) ? -1 : tenum.ord(); + assert tenum != null; + return tenum.ord(); } @Override public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException { - return (tenum == null) ? null : tenum.docs(bits, reuse); + assert tenum != null; + return tenum.docs(bits, reuse); } @Override public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { - return (tenum == null) ? null : tenum.docsAndPositions(bits, reuse); + assert tenum != null; + return tenum.docsAndPositions(bits, reuse); } @Override Index: src/java/org/apache/lucene/search/spans/SpanTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/spans/SpanTermQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/spans/SpanTermQuery.java (working copy) @@ -86,7 +86,7 @@ // NOTE: debateably, the caller should never pass in a // multi reader... final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(reader, - reader.getDeletedDocs(), + MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -20,9 +20,9 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -52,6 +52,7 @@ @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { if (prefix.text().length() == 0) { + // no prefix -- match all terms for this field: final Terms terms = MultiFields.getTerms(reader, getField()); return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; } Index: src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; @@ -178,7 +179,7 @@ if (terms.length > 1) { postingsEnum = new UnionDocsAndPositionsEnum(reader, terms); } else { - postingsEnum = reader.termPositionsEnum(reader.getDeletedDocs(), + postingsEnum = reader.termPositionsEnum(MultiFields.getDeletedDocs(reader), terms[0].field(), new BytesRef(terms[0].text())); } @@ -382,8 +383,7 @@ * Takes the logical union of multiple DocsEnum iterators. */ -// nocommit -- this must carefully take union of attr source -// as well -- this is tricky +// TODO: if ever we allow subclassing of the *PhraseScorer class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { private static final class DocsQueue extends PriorityQueue { @@ -454,7 +454,7 @@ public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { List docsEnums = new LinkedList(); - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); for (int i = 0; i < terms.length; i++) { DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs, terms[i].field(), Index: src/java/org/apache/lucene/search/function/ValueSourceQuery.java =================================================================== --- src/java/org/apache/lucene/search/function/ValueSourceQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/function/ValueSourceQuery.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; import org.apache.lucene.index.Term; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.Bits; @@ -137,7 +138,7 @@ qWeight = w.getValue(); // this is when/where the values are first created. vals = valSrc.getValues(reader); - delDocs = reader.getDeletedDocs(); + delDocs = MultiFields.getDeletedDocs(reader); maxDoc = reader.maxDoc(); } Index: src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/PhraseQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Explanation.IDFExplanation; @@ -154,21 +155,18 @@ return null; DocsAndPositionsEnum[] postings = new DocsAndPositionsEnum[terms.size()]; - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); final BytesRef text = new BytesRef(t.text()); - // NOTE: debateably, the caller should never pass in a - // multi reader... DocsAndPositionsEnum postingsEnum = MultiFields.getTermPositionsEnum(reader, delDocs, t.field(), text); + // PhraseQuery on a field that did not index + // positions. if (postingsEnum == null) { - if (MultiFields.getTermDocsEnum(reader, - delDocs, - t.field(), - text) != null) { + if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), text) != null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")"); } else { Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -144,7 +144,7 @@ // NOTE: debateably, the caller should never pass in a // multi reader... final Terms terms = MultiFields.getTerms(reader, field); - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return (terms != null) ? terms.iterator() : null; } return new TermRangeTermsEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); Index: src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -75,7 +75,7 @@ public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { // NOTE: debateably, the caller should never pass in a // multi reader... - DocsEnum docs = MultiFields.getTermDocsEnum(reader, reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs == null) { return null; } @@ -118,7 +118,7 @@ Explanation tfExplanation = new Explanation(); int tf = 0; - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), new BytesRef(term.text())); + DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text())); if (docs != null) { int newDoc = docs.advance(doc); if (newDoc == doc) { Index: src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 920046) +++ src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -283,7 +283,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -337,7 +337,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -396,7 +396,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -468,7 +468,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -536,7 +536,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -606,7 +606,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; try { while(true) { @@ -658,7 +658,7 @@ Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); @@ -711,7 +711,7 @@ if (terms != null) { final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docs = null; while(true) { final BytesRef term = termsEnum.next(); @@ -763,29 +763,5 @@ public PrintStream getInfoStream() { return infoStream; } - - // Directly parses a numeric value from UTF8 bytes - // nocommit -- whitespace? +e syntax? - final static long parseLong(BytesRef term) { - int upto = term.offset; - final int negMul; - if (term.bytes[upto] == '-') { - negMul = -1; - upto++; - } else { - negMul = 1; - } - final int end = term.offset + term.length; - long number = 0; - while(upto < end) { - final int b = term.bytes[upto++]; - if (b >= '0' && b <= '9') { - number = 10*number + (int) (b-'0'); - } else { - throw new NumberFormatException("could not parse \"" + term.utf8ToString() + "\" to a number"); - } - } - return negMul * number; - } } Index: src/java/org/apache/lucene/search/AutomatonQuery.java =================================================================== --- src/java/org/apache/lucene/search/AutomatonQuery.java (revision 920046) +++ src/java/org/apache/lucene/search/AutomatonQuery.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.ToStringUtils; @@ -79,9 +78,7 @@ // matches all possible strings if (BasicOperations.isTotal(automaton)) { - final Terms terms = MultiFields.getTerms(reader, getField()); - // nocommit -- should we just return null? singleton? - return (terms != null) ? terms.iterator() : TermsEnum.EMPTY; + return MultiFields.getTerms(reader, getField()).iterator(); } // matches a fixed string in singleton representation Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 920046) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -197,11 +197,11 @@ this.term = term; td.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs()"); } return this; @@ -256,11 +256,11 @@ this.term = term; tp.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs() skipDocs=" + skipDocs + " MultiFields.getDeletedDocs=" + MultiFields.getDeletedDocs(r) + " r=" + r); } return this; Index: src/java/org/apache/lucene/index/FieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/FieldsEnum.java (revision 920046) +++ src/java/org/apache/lucene/index/FieldsEnum.java (working copy) @@ -28,7 +28,8 @@ public abstract class FieldsEnum { - public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + // TODO: maybe allow retrieving FieldInfo for current + // field, as optional method? private AttributeSource atts = null; @@ -36,25 +37,38 @@ * Returns the related attributes. */ public AttributeSource attributes() { - if (atts == null) atts = new AttributeSource(); + if (atts == null) { + atts = new AttributeSource(); + } return atts; } - // nocommit -- do we need seek? - // nocommit -- should this return FieldInfo? /** Increments the enumeration to the next field. The * returned field is always interned, so simple == * comparison is allowed. Returns null when there are no * more fields.*/ public abstract String next() throws IOException; - // nocommit should we add a field()? fieldInfo()? - // mirrors TermsEnum - /** Get {@link TermsEnum} for the current field. You - * should not call {@link #next()} until you're done - * using this {@link TermsEnum}. After {@link #next} - * returns null, this method should not be called. */ + * should not call {@link #next} until you're done using + * this {@link TermsEnum}. After {@link #next} returns + * null this method should not be called. This method + * will not return null. */ public abstract TermsEnum terms() throws IOException; + + public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + + /** Provides zero fields */ + public final static FieldsEnum EMPTY = new FieldsEnum() { + + @Override + public String next() { + return null; + } + + @Override + public TermsEnum terms() { + throw new IllegalStateException("this method should never be called"); + } + }; } - Index: src/java/org/apache/lucene/index/MultiTerms.java =================================================================== --- src/java/org/apache/lucene/index/MultiTerms.java (revision 920046) +++ src/java/org/apache/lucene/index/MultiTerms.java (working copy) @@ -1,6 +1,5 @@ package org.apache.lucene.index; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -72,7 +71,7 @@ if (termsEnums.size() > 0) { return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY)); } else { - return null; + return TermsEnum.EMPTY; } } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 920046) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -90,8 +90,6 @@ private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false - // nocommit: unread field - private boolean flexPostings; // True if postings were written with new flex format private Codec codec; @@ -110,17 +108,9 @@ docStoreIsCompoundFile = false; delCount = 0; hasProx = true; - flexPostings = true; this.codec = codec; } - // nocommit -- this ctor is only used by back-compat tests - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { - this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true, null); - SegmentWriteState state = new SegmentWriteState(null, dir, name, null, null, docCount, docCount, -1, Codecs.getDefault()); - codec = state.codec = Codecs.getDefault().getWriter(state); - } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { @@ -627,11 +617,9 @@ if (useCompoundFile) { files.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { - final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { addIfExists(files, IndexFileNames.segmentFileName(name, ext)); } - // nocommit -- only does ifExists on prx for standard codec codec.files(dir, this, files); } Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 920046) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -171,7 +171,7 @@ if (terms != null) { return terms.iterator(); } else { - return null; + return TermsEnum.EMPTY; } } } @@ -196,7 +196,7 @@ @Override public Bits getDeletedDocs() throws IOException { - return ((IndexReader) readers.get(0)).getDeletedDocs(); + return MultiFields.getDeletedDocs(readers.get(0)); } @Override Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 920046) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -57,7 +57,6 @@ private SegmentInfo si; private int readBufferSize; - boolean isPreFlex; CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); @@ -835,7 +834,7 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -852,7 +851,7 @@ @Override public TermEnum terms(Term t) throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -901,7 +900,7 @@ @Override public TermDocs termDocs() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -921,7 +920,7 @@ @Override public TermPositions termPositions() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -1300,19 +1299,11 @@ // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the // same entry in the FieldCache. See LUCENE-1579. - // nocommit - what to return here? @Override public final Object getFieldCacheKey() { return core; } - // nocommit: missing? - //@Override - //public long getUniqueTermCount() { - // return core.getTermsReader().size(); - //} - - /** * Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
@@ -1363,9 +1354,6 @@ } else if (t != null) { // Pre-seek to this term - // nocommit -- inefficient; do we need - // FieldsEnum.seek? (but this is slow only for - // legacy API, and, when field count is high) while(currentField.compareTo(t.field) < 0) { currentField = fields.next(); if (currentField == null) { @@ -1379,7 +1367,6 @@ // We found some field -- get its terms: terms = fields.terms(); - // nocommit: confirm inlining is working! if (currentField == t.field) { // We found exactly the requested field; now // seek the term text: @@ -1486,7 +1473,6 @@ public void close() {} public void seek(TermEnum termEnum) throws IOException { - // nocommit -- optimize for the special cases here seek(termEnum.term()); } @@ -1590,7 +1576,6 @@ public void close() {} public void seek(TermEnum termEnum) throws IOException { - // nocommit -- optimize for the special cases here seek(termEnum.term()); } Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 920046) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -384,9 +384,9 @@ public Object clone() { SegmentInfos sis = (SegmentInfos) super.clone(); for(int i=0;i(userData); return sis; Index: src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndex.java (revision 920046) +++ src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -492,7 +492,7 @@ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index - segInfoStat.termIndexStatus = testTermIndex(info, reader); + segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); @@ -575,7 +575,7 @@ /** * Test the term index. */ - private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final int maxDoc = reader.maxDoc(); Index: src/java/org/apache/lucene/index/Terms.java =================================================================== --- src/java/org/apache/lucene/index/Terms.java (revision 920046) +++ src/java/org/apache/lucene/index/Terms.java (working copy) @@ -29,17 +29,16 @@ public abstract class Terms { - public final static Terms[] EMPTY_ARRAY = new Terms[0]; - // Privately cache a TermsEnum per-thread for looking up // docFreq and getting a private DocsEnum private final CloseableThreadLocal threadEnums = new CloseableThreadLocal(); - /** Returns an iterator that will step through all terms */ + /** Returns an iterator that will step through all + * terms. This method will not return null.*/ public abstract TermsEnum iterator() throws IOException; /** Return the BytesRef Comparator used to sort terms - * provided by the iterator. NOTE: this may return null + * provided by the iterator. This method may return null * if there are no terms. This method may be invoked * many times; it's best to cache a single instance & * reuse it. */ @@ -57,9 +56,8 @@ } } - // nocommit -- or maybe make a separate positions(...) method? - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method may + * return null if the term does not exist. */ public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -69,8 +67,9 @@ } } - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method will + * may return null if the term does not exists, or + * positions were not indexed. */ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -97,4 +96,5 @@ protected void close() { threadEnums.close(); } + public final static Terms[] EMPTY_ARRAY = new Terms[0]; } Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 920046) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -30,7 +30,6 @@ import org.apache.lucene.index.DirectoryReader.MultiTermPositions; // deprecated import org.apache.lucene.search.Similarity; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close -import org.apache.lucene.util.MultiBits; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; @@ -46,8 +45,6 @@ private int maxDoc = 0; private int numDocs = -1; private boolean hasDeletions = false; - private MultiBits deletedDocs; - //private Fields fields; /** *

Construct a MultiReader aggregating the named set of (sub)readers. @@ -76,7 +73,6 @@ this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; - Bits[] subs = new Bits[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; @@ -92,7 +88,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; } - subs[i] = subReaders[i].getDeletedDocs(); final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), @@ -101,21 +96,21 @@ } starts[subReaders.length] = maxDoc; - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } } @Override + public long getUniqueTermCount() throws IOException { + throw new UnsupportedOperationException(""); + } + + @Override public int getSubReaderDocBase(IndexReader subReader) { return subReaderToSlice.get(subReader).start; } @Override public Fields fields() throws IOException { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } /** @@ -162,11 +157,7 @@ @Override public Bits getDeletedDocs() throws IOException { - if (subReaders.length == 1) { - return subReaders[0].getDeletedDocs(); - } else { - return deletedDocs; - } + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } /** Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 920046) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -30,7 +30,7 @@ private AttributeSource atts = null; - // nocommit + // nocommit -- debugging public String desc; /** Returns term frequency in the current document. Do @@ -44,11 +44,10 @@ return atts; } - // nocommit -- state in API that doc/freq are undefined - // (defined?) after this? // nocommit -- fix this API so that intblock codecs are // able to return their own int arrays, to save a copy... IntsRef? - /** Bulk read: returns number of docs read. + /** Bulk read: returns number of docs read. After this is + * called, {@link #doc} and {@link #freq} are undefined. * *

NOTE: the default impl simply delegates to {@link * #nextDoc}, but subclasses may do this more Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 920046) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -39,7 +39,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.MultiBits; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close @@ -352,6 +351,7 @@ buffer.append('('); for(SegmentReader r : subReaders) { buffer.append(r); + buffer.append(' '); } buffer.append(')'); return buffer.toString(); @@ -360,7 +360,6 @@ private void initialize(SegmentReader[] subReaders) throws IOException { this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array - Bits[] subs = new Bits[subReaders.length]; final List subFields = new ArrayList(); final List fieldSlices = new ArrayList(); @@ -371,7 +370,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; - subs[i] = subReaders[i].getDeletedDocs(); } final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i); @@ -384,20 +382,11 @@ } } starts[subReaders.length] = maxDoc; - - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } } - private Bits deletedDocs; - @Override public Bits getDeletedDocs() { - // nocommit -- maybe not supported? - return deletedDocs; + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } @Override @@ -722,7 +711,6 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - //nocommit: investigate this opto if (subReaders.length == 1) { // Optimize single segment case: return subReaders[0].terms(); @@ -785,11 +773,7 @@ @Override public Fields fields() throws IOException { - if (subReaders.length == 0) { - return null; - } else { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); - } + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } @Override @@ -946,6 +930,11 @@ } @Override + public long getUniqueTermCount() throws IOException { + throw new UnsupportedOperationException(""); + } + + @Override public Map getCommitUserData() { ensureOpen(); return segmentInfos.getUserData(); Index: src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- src/java/org/apache/lucene/index/TermsHash.java (revision 920046) +++ src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -44,9 +44,6 @@ final int postingsFreeChunk; final DocumentsWriter docWriter; - //nocommit: did I lose code for this below in merge? - private TermsHash primaryTermsHash; - private RawPostingList[] postingsFreeList = new RawPostingList[1]; private int postingsFreeCount; private int postingsAllocCount; Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 920046) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -175,14 +175,6 @@ // Basic files for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) { - - // nocommit - /* - if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx()) - continue; - - */ - if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) && !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) files.add(IndexFileNames.segmentFileName(segment, ext)); @@ -571,12 +563,25 @@ final List fields = new ArrayList(); final List subReaders = new ArrayList(); final List slices = new ArrayList(); + final List bits = new ArrayList(); + final List bitsStarts = new ArrayList(); final int numReaders = readers.size(); for(int i=0;iNOTE: if this is a multi reader ({@link * #getSequentialSubReaders} is not null) then this @@ -914,16 +914,20 @@ public abstract int docFreq(Term t) throws IOException; /** Returns the number of documents containing the term - * t. This method does not take into - * account deleted documents that have not yet been - * merged away. */ + * t. This method returns 0 if the term or + * field does not exists. This method does not take into + * account deleted documents that have not yet been merged + * away. */ public int docFreq(String field, BytesRef term) throws IOException { - final Terms terms = fields().terms(field); - if (terms != null) { - return terms.docFreq(term); - } else { + final Fields fields = fields(); + if (fields == null) { return 0; } + final Terms terms = fields.terms(field); + if (terms == null) { + return 0; + } + return terms.docFreq(term); } /** Returns an enumeration of all the documents which contain @@ -948,64 +952,50 @@ return termDocs; } + /** This may return null if the field does not exist.*/ public Terms terms(String field) throws IOException { final Fields fields = fields(); - if (fields != null) { - return fields.terms(field); - } else { + if (fields == null) { return null; } + return fields.terms(field); } /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This may return null, if either the field or + * term does not exist. */ public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } + if (fields == null) { + return null; } - - return null; + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, term, null); + } else { + return null; + } } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This may return null, if either the + * field or term does not exist, or, positions were not + * stored for this term. */ public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = fields(); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms + " this=" + this); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("ir.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } + if (fields == null) { + return null; } - - return null; + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, null); + } else { + return null; + } } /** Returns an unpositioned {@link TermDocs} enumerator. @@ -1230,12 +1220,14 @@ } } - /** - * Returns the {@link Bits} representing deleted docs. A - * set bit indicates the doc ID has been deleted. This - * method should return null when there are no deleted - * docs. */ private Bits deletedDocsBits; + + /** Returns the {@link Bits} representing deleted docs. A + * set bit indicates the doc ID has been deleted. This + * method should return null when there are no deleted + * docs. + * + * @lucene.experimental */ public Bits getDeletedDocs() throws IOException { if (deletedDocsBits == null) { deletedDocsBits = new DeletedDocsBits(); @@ -1397,10 +1389,6 @@ /** Returns the number of unique terms (across all fields) * in this reader. * - * This method returns long, even though internally - * Lucene cannot handle more than 2^31 unique terms, for - * a possible future when this limitation is removed. - * * @throws UnsupportedOperationException if this count * cannot be easily determined (eg Multi*Readers). * Instead, you should call {@link @@ -1408,13 +1396,17 @@ * its unique term count. */ public long getUniqueTermCount() throws IOException { long numTerms = 0; - FieldsEnum it = fields().iterator(); + final Fields fields = fields(); + if (fields == null) { + return 0; + } + FieldsEnum it = fields.iterator(); while(true) { String field = it.next(); if (field == null) { break; } - numTerms += fields().terms(field).getUniqueTermCount(); + numTerms += fields.terms(field).getUniqueTermCount(); } return numTerms; } @@ -1431,13 +1423,25 @@ private Fields fields; - /** lucene.experimental */ - public void storeFields(Fields fields) { + /** lucene.internal */ + void storeFields(Fields fields) { this.fields = fields; } - /** lucene.experimental */ - public Fields retrieveFields() { + /** lucene.internal */ + Fields retrieveFields() { return fields; } + + private Bits storedDelDocs; + + /** lucene.internal */ + void storeDelDocs(Bits delDocs) { + this.storedDelDocs = delDocs; + } + + /** lucene.internal */ + Bits retrieveDelDocs() { + return storedDelDocs; + } } Index: src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 920046) +++ src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -35,8 +35,8 @@ public final class MultiTermsEnum extends TermsEnum { private final TermMergeQueue queue; - private final TermsEnumWithSlice[] subs; - private final TermsEnumWithSlice[] currentSubs; + private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader) + private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field private final TermsEnumWithSlice[] top; private final MultiDocsEnum.EnumWithSlice[] subDocs; private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions; @@ -93,7 +93,7 @@ /** The terms array must be newly created TermsEnum, ie * {@link TermsEnum#next} has not yet been called. */ - public MultiTermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { + public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { assert termsEnumsIndex.length <= top.length; numSubs = 0; numTop = 0; @@ -128,7 +128,7 @@ } if (queue.size() == 0) { - return null; + return TermsEnum.EMPTY; } else { return this; } @@ -141,12 +141,12 @@ for(int i=0;i last one + assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.field(); Terms terms = fields.terms(currentField); if (terms != null) { @@ -1030,8 +1029,10 @@ if (termsEnum == null) { continue; } + assert checkDeleteTerm(term); termRef.copy(term.text()); + if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); Index: src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java =================================================================== --- src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (revision 920046) +++ src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (working copy) @@ -95,9 +95,6 @@ public void abort() {} - // nocommit -- should be @ thread level not field - private final BytesRef flushTerm = new BytesRef(); - /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to @@ -109,6 +106,8 @@ final int numPostings = termsHashPerField.numPostings; + final BytesRef flushTerm = perThread.flushTerm; + assert numPostings >= 0; if (!doVectors || numPostings == 0) @@ -128,8 +127,8 @@ perThread.doc.addField(termsHashPerField.fieldInfo.number); - // nocommit -- should I sort by whatever terms dict is - // sorting by? + // TODO: we may want to make this sort in same order + // as Codec's terms dict? final RawPostingList[] postings = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUTF16Comparator()); tvf.writeVInt(numPostings); Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 920046) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -614,10 +614,7 @@ // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: - // nocommit: old api - sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor, null); - // nocommit -- if info is from external dir DO NOT - // cache it! + sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor, codecs); readerMap.put(info, sr); } else { if (doOpenStores) { @@ -978,7 +975,6 @@ * false or if there is any other low-level * IO error */ - // nocommit -- need IW.Config!! public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit, Codecs codecs) throws CorruptIndexException, LockObtainFailedException, IOException { init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit, codecs); @@ -3867,10 +3863,7 @@ } } - //nocommit: is this supposed to be here or not? - //merge.info.setHasProx(merger.hasProx()); - // mxx - // System.out.println(Thread.currentThread().getName() + ": finish setHasProx=" + merger.hasProx() + " seg=" + merge.info.name); + merge.info.setHasProx(merger.hasProx()); segmentInfos.subList(start, start + merge.segments.size()).clear(); assert !segmentInfos.contains(merge.info); @@ -4375,8 +4368,7 @@ } // This was a private clone and we had the // only reference - // nocommit -- why commented out? - // assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); + assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount(); } } } else { @@ -4388,8 +4380,7 @@ if (merge.readersClone[i] != null) { merge.readersClone[i].close(); // This was a private clone and we had the only reference - // nocommit -- why commented out? - //assert merge.readersClone[i].getRefCount() == 0; + assert merge.readersClone[i].getRefCount() == 0; } } } Index: src/java/org/apache/lucene/index/Fields.java =================================================================== --- src/java/org/apache/lucene/index/Fields.java (revision 920046) +++ src/java/org/apache/lucene/index/Fields.java (working copy) @@ -19,18 +19,18 @@ import java.io.IOException; -/** Access to fields and terms +/** Flex API for access to fields and terms * @lucene.experimental */ public abstract class Fields { - public final static Fields[] EMPTY_ARRAY = new Fields[0]; - /** Returns an iterator that will step through all fields - * names */ + * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field */ + /** Get the {@link Terms} for this field. This may return + * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; + + public final static Fields[] EMPTY_ARRAY = new Fields[0]; } - Index: src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- src/java/org/apache/lucene/index/SegmentWriteState.java (revision 920046) +++ src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -31,6 +31,9 @@ * @lucene.experimental */ public class SegmentWriteState { + // nocommit -- not clean that this is here; sometimes we + // write a newly flushed segment; other times a merged + // segment (and this is null): DocumentsWriter docWriter; public Directory directory; public String segmentName; Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (working copy) @@ -53,7 +53,7 @@ // Starts a new term FieldInfo fieldInfo; - // nocommit + // nocommit -- debugging String desc; /** @lucene.experimental */ @@ -159,7 +159,7 @@ } @Override - public void addDoc(int docID, int termDocFreq) throws IOException { + public void startDoc(int docID, int termDocFreq) throws IOException { assert docID >= 0: "got docID=" + docID; @@ -183,7 +183,7 @@ if (Codec.DEBUG) System.out.println(" docID=" + doc.docID); - wrappedPostingsWriter.addDoc(doc.docID, doc.termDocFreq); + wrappedPostingsWriter.startDoc(doc.docID, doc.termDocFreq); if (!omitTF) { assert doc.termDocFreq == doc.numPositions; @@ -208,7 +208,7 @@ if (pulsed) { // We've already seen too many docs for this term -- // just forward to our fallback writer - wrappedPostingsWriter.addDoc(docID, termDocFreq); + wrappedPostingsWriter.startDoc(docID, termDocFreq); } else { currentDoc = pendingDocs[pendingDocCount++]; currentDoc.docID = docID; @@ -255,6 +255,8 @@ @Override public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { + assert docCount > 0; + if (Codec.DEBUG) { System.out.println("PW: finishTerm pendingDocCount=" + pendingDocCount); } Index: src/java/org/apache/lucene/index/codecs/TermsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/TermsConsumer.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/TermsConsumer.java (working copy) @@ -31,10 +31,12 @@ public abstract class TermsConsumer { - /** Starts a new term in this field. */ + /** Starts a new term in this field; this may be called + * with no corresponding call to finish if the term had + * no docs. */ public abstract PostingsConsumer startTerm(BytesRef text) throws IOException; - /** Finishes the current term */ + /** Finishes the current term; numDocs must be > 0. */ public abstract void finishTerm(BytesRef text, int numDocs) throws IOException; /** Called when we are done adding terms to this field */ @@ -51,6 +53,7 @@ public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException { BytesRef term; + assert termsEnum != null; if (mergeState.fieldInfo.omitTermFreqAndPositions) { if (docsEnum == null) { @@ -61,13 +64,14 @@ MultiDocsEnum docsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsEnum docsEnumIn2 = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); - if (docsEnumIn2 != null) { - docsEnumIn = docsEnumIn2; + docsEnumIn = (MultiDocsEnum) termsEnum.docs(mergeState.multiDeletedDocs, docsEnumIn); + if (docsEnumIn != null) { docsEnum.reset(docsEnumIn); final PostingsConsumer postingsConsumer = startTerm(term); final int numDocs = postingsConsumer.merge(mergeState, docsEnum); - finishTerm(term, numDocs); + if (numDocs > 0) { + finishTerm(term, numDocs); + } } } } else { @@ -77,13 +81,14 @@ postingsEnum.setMergeState(mergeState); MultiDocsAndPositionsEnum postingsEnumIn = null; while((term = termsEnum.next()) != null) { - MultiDocsAndPositionsEnum postingsEnumIn2 = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); - if (postingsEnumIn2 != null) { - postingsEnumIn = postingsEnumIn2; + postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(mergeState.multiDeletedDocs, postingsEnumIn); + if (postingsEnumIn != null) { postingsEnum.reset(postingsEnumIn); final PostingsConsumer postingsConsumer = startTerm(term); final int numDocs = postingsConsumer.merge(mergeState, postingsEnum); - finishTerm(term, numDocs); + if (numDocs > 0) { + finishTerm(term, numDocs); + } } } } Index: src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (working copy) @@ -39,7 +39,7 @@ // TODO: -- can we simplify this? public abstract static class Index { - // nocommit + // nocommit -- debugging public String desc; public abstract void read(IndexInput indexIn, boolean absolute) throws IOException; @@ -75,7 +75,7 @@ return bulkResult; } - // nocommit + // nocommit -- debugging public abstract String descFilePointer() throws IOException; } } Index: src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (working copy) @@ -39,7 +39,7 @@ public abstract static class Index { - // nocommit + // nocommit -- debugging public String desc; /** Internally records the current location */ @@ -59,6 +59,6 @@ public abstract void close() throws IOException; - // nocommit + // nocommit -- debugging public abstract String descFilePointer() throws IOException; } Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -352,7 +352,7 @@ } } - // nocommit + // nocommit -- debugging if (Codec.DEBUG) { System.out.println(" return doc=" + doc); } @@ -594,7 +594,7 @@ } } - // nocommit + // nocommit -- debugging if (Codec.DEBUG) { System.out.println(" return doc=" + doc); } Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (working copy) @@ -164,7 +164,7 @@ /** Adds a new doc in this term. If this returns null * then we just skip consuming positions/payloads. */ @Override - public void addDoc(int docID, int termDocFreq) throws IOException { + public void startDoc(int docID, int termDocFreq) throws IOException { final int delta = docID - lastDocID; @@ -260,6 +260,7 @@ long skipPos = skipOut.getFilePointer(); // TODO: -- wasteful we are counting this in two places? + assert docCount > 0; assert docCount == df; if (Codec.DEBUG) { System.out.println("dw.finishTerm termsFP=" + termsOut.getFilePointer() + " df=" + df + " skipPos=" + skipPos); @@ -302,7 +303,7 @@ lastDocID = 0; df = 0; - // nocommit + // nocommit -- debugging count = 0; } Index: src/java/org/apache/lucene/index/codecs/Codec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codec.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/Codec.java (working copy) @@ -56,9 +56,7 @@ /** Reads a segment. NOTE: by the time this call * returns, it must hold open any files it will need to * use; else, those files may be deleted. */ - // nocommit -- add a "required capabilities" here; this - // way merging could say only "TERMS_LINEAR_SCAN" but - // searching would say "TERMS_RANDOM_ACCESS"? + // nocommit -- make a SegmentReadState (symmetric)? public abstract FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException; /** Gathers files associated with this segment */ Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (working copy) @@ -522,12 +522,6 @@ // a "how many terms until next index entry" in each // index entry, but that'd require some tricky // lookahead work when writing the index - - // nocommit -- this call to isIndexTerm is not - // right, when indexDivisor > 1? ie, this will - // return false for entries that actually are index - // terms, and then the postings impl will read the - // wrong offset. make a test... postingsReader.readTerm(in, fieldInfo, state, indexReader.isIndexTerm(1+state.ord, state.docFreq, false)); @@ -552,6 +546,7 @@ System.out.println("stdr.docs"); } DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + assert docsEnum != null; if (Codec.DEBUG) { docsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); } @@ -567,14 +562,12 @@ return null; } else { DocsAndPositionsEnum postingsEnum = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); - if (Codec.DEBUG) { - if (postingsEnum != null) { + if (postingsEnum != null) { + if (Codec.DEBUG) { postingsEnum.desc = fieldInfo.name + ":" + bytesReader.term.utf8ToString(); + Codec.debug(" return enum=" + postingsEnum); } } - if (Codec.DEBUG) { - Codec.debug(" return enum=" + postingsEnum); - } return postingsEnum; } } Index: src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (working copy) @@ -37,8 +37,7 @@ private long[] lastSkipProxPointer; private IndexOutput freqOutput; - // nocommit -- private again - public IndexOutput proxOutput; + private IndexOutput proxOutput; private int curDoc; private boolean curStorePayloads; @@ -57,16 +56,6 @@ lastSkipProxPointer = new long[numberOfSkipLevels]; } - // nocommit -- made public - //public void setFreqOutput(IndexOutput freqOutput) { - //this.freqOutput = freqOutput; - //} - - // nocommit -- made public - //public void setProxOutput(IndexOutput proxOutput) { - //this.proxOutput = proxOutput; - //} - /** * Sets the values for the current skip data. */ Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (working copy) @@ -105,9 +105,6 @@ skipListWriter.resetSkip(); } - // nocommit -- should we NOT reuse across fields? would - // be cleaner - // Currently, this instance is re-used across fields, so // our parent calls setField whenever the field changes @Override @@ -125,7 +122,7 @@ /** Adds a new doc in this term. If this returns null * then we just skip consuming positions/payloads. */ @Override - public void addDoc(int docID, int termDocFreq) throws IOException { + public void startDoc(int docID, int termDocFreq) throws IOException { final int delta = docID - lastDocID; @@ -140,9 +137,6 @@ if ((++df % skipInterval) == 0) { skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); skipListWriter.bufferSkip(df); - if (Codec.DEBUG) { - System.out.println(" bufferSkip lastDocID=" + lastDocID + " df=" + df + " freqFP=" + freqOut.getFilePointer() + " proxFP=" + skipListWriter.proxOutput.getFilePointer()); - } } assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs; @@ -212,8 +206,12 @@ /** Called when we are done adding docs to this term */ @Override public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { - // nocommit -- wasteful we are counting this in two places? + assert docCount > 0; + + // TODO: wasteful we are counting this (counting # docs + // for this term) in two places? assert docCount == df; + // mxx if (Codec.DEBUG) { Codec.debug("dw.finishTerm termsOut.fp=" + termsOut.getFilePointer() + " freqStart=" + freqStart + " df=" + df + " isIndex?=" + isIndexTerm); @@ -251,7 +249,7 @@ lastDocID = 0; df = 0; - // nocommit + // nocommit -- debugging count = 0; } Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (working copy) @@ -182,23 +182,23 @@ @Override public void finishTerm(BytesRef text, int numDocs) throws IOException { + assert numDocs > 0; + if (Codec.DEBUG) { Codec.debug("finishTerm seg=" + segment + " text=" + fieldInfo.name + ":" + text.utf8ToString() + " numDocs=" + numDocs + " numTerms=" + numTerms); } - if (numDocs > 0) { - final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs); + final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs); - if (Codec.DEBUG) { - Codec.debug(" tis.fp=" + out.getFilePointer() + " isIndexTerm?=" + isIndexTerm); - System.out.println(" term bytes=" + text.utf8ToString()); - } - termWriter.write(text); - out.writeVInt(numDocs); + if (Codec.DEBUG) { + Codec.debug(" tis.fp=" + out.getFilePointer() + " isIndexTerm?=" + isIndexTerm); + System.out.println(" term bytes=" + text.utf8ToString()); + } + termWriter.write(text); + out.writeVInt(numDocs); - postingsWriter.finishTerm(numDocs, isIndexTerm); - numTerms++; - } + postingsWriter.finishTerm(numDocs, isIndexTerm); + numTerms++; } // Finishes all terms in this field Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy) @@ -35,10 +35,6 @@ * postings format. * @lucene.experimental */ -// nocommit -- should we switch "hasProx" higher up? and -// create two separate docs readers, one that also reads -// prox and one that doesn't? - public class StandardPostingsReaderImpl extends StandardPostingsReader { private final IndexInput freqIn; @@ -227,10 +223,9 @@ freqOffset = termState.freqOffset; skipOffset = termState.skipOffset; - // nocommit this seek frequently isn't needed, when - // we enum terms and all docs for each term (MTQ, - // or, merging). is this seek costing us anything? - // we should avoid it so... + // TODO: for full enum case (eg segment merging) this + // seek is unnecessary; maybe we can avoid in such + // cases freqIn.seek(termState.freqOffset); limit = termState.docFreq; ord = 0; @@ -447,10 +442,9 @@ this.skipDocs = skipDocs; - // nocommit this seek frequently isn't needed, when - // we enum terms and all docs for each term (MTQ, - // or, merging). is this seek costing us anything? - // we should avoid it so... + // TODO: for full enum case (eg segment merging) this + // seek is unnecessary; maybe we can avoid in such + // cases freqIn.seek(termState.freqOffset); lazyProxPointer = termState.proxOffset; Index: src/java/org/apache/lucene/index/codecs/FieldsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (working copy) @@ -20,7 +20,6 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.TermsEnum; import java.io.IOException; @@ -41,14 +40,12 @@ public void merge(MergeState mergeState, Fields fields) throws IOException { FieldsEnum fieldsEnum = fields.iterator(); + assert fieldsEnum != null; String field; while((field = fieldsEnum.next()) != null) { mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); - final TermsEnum termsEnum = fieldsEnum.terms(); - if (termsEnum != null) { - termsConsumer.merge(mergeState, termsEnum); - } + termsConsumer.merge(mergeState, fieldsEnum.terms()); } } } Index: src/java/org/apache/lucene/index/codecs/PostingsConsumer.java =================================================================== --- src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (working copy) @@ -29,14 +29,13 @@ public abstract class PostingsConsumer { - // nocommit + // nocommit -- debugging public String desc; - // nocommit -- rename to startDoc? /** Adds a new doc in this term. Return null if this * consumer doesn't need to see the positions for this * doc. */ - public abstract void addDoc(int docID, int termDocFreq) throws IOException; + public abstract void startDoc(int docID, int termDocFreq) throws IOException; public static class PostingsMergeState { DocsEnum docsEnum; @@ -67,7 +66,7 @@ if (doc == DocsAndPositionsEnum.NO_MORE_DOCS) { break; } - addDoc(doc, postings.freq()); + startDoc(doc, postings.freq()); df++; } } else { @@ -78,7 +77,7 @@ break; } final int freq = postingsEnum.freq(); - addDoc(doc, freq); + startDoc(doc, freq); for(int i=0;i>> 1; // shift off low bit if ((docCode & 1) != 0) // if low bit is set freq = 1; // freq is one - else + else { freq = freqStream.readVInt(); // else read freq + assert freq != 1; + } } count++; Index: src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (working copy) @@ -206,9 +206,6 @@ } } - // nocommit -- make sure these optimizations survive - // into flex - // optimize sequential access: first try scanning cached enum w/o seeking SegmentTermEnum enumerator = resources.termEnum; if (enumerator.term() != null // term is at or past current Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 920046) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -348,8 +348,6 @@ @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - // nocommit -- must assert that skipDocs "matches" the - // underlying deletedDocs? if (reuse != null) { return ((PreDocsEnum) reuse).reset(termEnum, skipDocs); } else { @@ -359,8 +357,6 @@ @Override public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { - // nocommit -- must assert that skipDocs "matches" the - // underlying deletedDocs? if (reuse != null) { return ((PreDocsAndPositionsEnum) reuse).reset(termEnum, skipDocs); } else { Index: src/java/org/apache/lucene/index/MultiFields.java =================================================================== --- src/java/org/apache/lucene/index/MultiFields.java (revision 920046) +++ src/java/org/apache/lucene/index/MultiFields.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.MultiBits; /** * Exposes flex API, merged from flex API of sub-segments. @@ -45,13 +45,24 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; - private final Map terms = new HashMap(); + private final Map terms = new HashMap(); + /** Returns a single {@link Fields} instance for this + * reader, merging fields/terms/docs/positions on the + * fly. This method will not return null. + * + *

: this is a slow way to access postings. + * It's better to get the sub-readers (using {@link + * ReaderUtil#Gather}) and iterate through them + * yourself. */ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { // already an atomic reader return r.fields(); + } else if (subs.length == 0) { + // no fields + return null; } else if (subs.length == 1) { return getFields(subs[0]); } else { @@ -61,8 +72,15 @@ final List fields = new ArrayList(); final List slices = new ArrayList(); - ReaderUtil.gatherSubFields(null, fields, slices, r, 0); + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + fields.add(r.fields()); + slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1)); + } + }.run(); + if (fields.size() == 0) { return null; } else if (fields.size() == 1) { @@ -77,66 +95,81 @@ } } + public static Bits getDeletedDocs(IndexReader r) throws IOException { + Bits result; + if (r.hasDeletions()) { + + result = r.retrieveDelDocs(); + if (result == null) { + + final List bits = new ArrayList(); + final List starts = new ArrayList(); + final List nulls = new ArrayList(); + + final int maxDoc = new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + // record all delDocs, even if they are null + bits.add(r.getDeletedDocs()); + starts.add(base); + } + }.run(); + starts.add(maxDoc); + + assert bits.size() > 0; + if (bits.size() == 1) { + // Only one actual sub reader -- optimize this case + result = bits.get(0); + } else { + result = new MultiBits(bits, starts); + } + r.storeDelDocs(result); + } + } else { + result = null; + } + + return result; + } + + /** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { final Fields fields = getFields(r); - if (fields != null) { + if (fields == null) { + return null; + } else { return fields.terms(field); - } else { - return null; } } /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This may return null if the term does not + * exist. */ public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docs(skipDocs, term, null); + } else { + return null; } - - return null; } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This may return null if the term does + * not exist or positions were not indexed. */ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; - - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, null); + } else { + return null; } - - return null; } - public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { this.subs = subs; this.subSlices = subSlices; @@ -148,14 +181,11 @@ final List fieldsEnums = new ArrayList(); final List fieldsSlices = new ArrayList(); for(int i=0;i allSubReaders, IndexReader reader) { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader itself, and do not recurse - allSubReaders.add(reader); - } else { - for (int i = 0; i < subReaders.length; i++) { - gatherSubReaders(allSubReaders, subReaders[i]); - } + + public static void gatherSubReaders(final List allSubReaders, IndexReader reader) { + try { + new Gather(reader) { + @Override + protected void add(int base, IndexReader r) { + allSubReaders.add(r); + } + }.run(); + } catch (IOException ioe) { + // won't happen + throw new RuntimeException(ioe); } } - public static int gatherSubFields(List readers, List fields, List slices, IndexReader reader, int base) throws IOException { - IndexReader[] subReaders = reader.getSequentialSubReaders(); - if (subReaders == null) { - // Add the reader's fields - if (readers != null) { - readers.add(reader); + public static abstract class Gather { + private final IndexReader topReader; + + public Gather(IndexReader r) { + topReader = r; + } + + public int run() throws IOException { + return run(0, topReader); + } + + public int run(int docBase) throws IOException { + return run(docBase, topReader); + } + + private int run(int base, IndexReader reader) throws IOException { + IndexReader[] subReaders = reader.getSequentialSubReaders(); + if (subReaders == null) { + // atomic reader + add(base, reader); + base += reader.maxDoc(); + } else { + // composite reader + for (int i = 0; i < subReaders.length; i++) { + base = run(base, subReaders[i]); + } } - fields.add(reader.fields()); - slices.add(new Slice(base, reader.maxDoc(), fields.size()-1)); - base += reader.maxDoc(); - } else { - for (int i = 0; i < subReaders.length; i++) { - base = gatherSubFields(readers, fields, slices, subReaders[i], base); - } + + return base; } - return base; + protected abstract void add(int base, IndexReader r) throws IOException; } /** Index: src/java/org/apache/lucene/util/Bits.java =================================================================== --- src/java/org/apache/lucene/util/Bits.java (revision 920046) +++ src/java/org/apache/lucene/util/Bits.java (working copy) @@ -17,7 +17,13 @@ * limitations under the License. */ +/** + * @lucene.experimental + */ + public interface Bits { public boolean get(int index); public int length(); + + public static final Bits[] EMPTY_ARRAY = new Bits[0]; } Index: src/java/org/apache/lucene/util/MultiBits.java =================================================================== --- src/java/org/apache/lucene/util/MultiBits.java (revision 920046) +++ src/java/org/apache/lucene/util/MultiBits.java (working copy) @@ -17,21 +17,30 @@ * limitations under the License. */ +import java.util.List; + /** * Concatenates multiple Bits together, on every lookup. * *

NOTE: This is very costly, as every lookup must * do a binary search to locate the right sub-reader. + * + * @lucene.experimental */ + public final class MultiBits implements Bits { private final Bits[] subs; // length is 1+subs.length (the last entry has the maxDoc): private final int[] starts; - public MultiBits(Bits[] subs, int[] starts) { - this.subs = subs; - this.starts = starts; + public MultiBits(List bits, List starts) { + assert starts.size() == 1+bits.size(); + this.subs = bits.toArray(Bits.EMPTY_ARRAY); + this.starts = new int[starts.size()]; + for(int i=0;i 0) { - sb.append(' '); - } - sb.append(asUnicodeChar(s.charAt(i))); - } - sb.append(']'); - return sb.toString(); - } - public void copy(BytesRef other) { if (bytes == null) { bytes = new byte[other.length]; Index: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 920046) +++ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -478,7 +478,7 @@ TermsEnum terms = fields.terms(); DocsEnum docs = null; while(terms.next() != null) { - docs = terms.docs(reader.getDeletedDocs(), docs); + docs = terms.docs(MultiFields.getDeletedDocs(reader), docs); while(docs.nextDoc() != docs.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } Index: contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java =================================================================== --- contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java (revision 920046) +++ contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java (working copy) @@ -73,8 +73,6 @@ for (int i = 0; i < ir.maxDoc(); i++) { - // nocommit - /* mapper = new ParallelArrayTermVectorMapper(); accessor.accept(ir, i, "a", mapper); tfv = mapper.materializeVector(); @@ -94,7 +92,6 @@ assertEquals("doc " + i, 8, tfv.getTermFrequencies().length); assertEquals("doc " + i, "c", tfv.getTerms()[2]); assertEquals("doc " + i, 7, tfv.getTermFrequencies()[2]); - */ mapper = new ParallelArrayTermVectorMapper(); accessor.accept(ir, i, "q", mapper); Index: contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (revision 920046) +++ contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (working copy) @@ -202,6 +202,8 @@ if (oldDels != null) { dels.or(oldDels); } + // nocommit -- not good that this class has to do this... + storeDelDocs(null); } @Override Index: contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (revision 920046) +++ contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (working copy) @@ -100,7 +100,7 @@ positions.clear(); } - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); Terms terms = MultiFields.getTerms(indexReader, field); boolean anyTerms = false; Index: contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (revision 920046) +++ contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; @@ -83,7 +84,7 @@ private OpenBitSet correctBits(IndexReader reader) throws IOException { OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); @@ -121,7 +122,7 @@ OpenBitSet bits=new OpenBitSet(reader.maxDoc()); bits.set(0,reader.maxDoc()); //assume all are valid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator();