Index: src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- src/test/org/apache/lucene/TestExternalCodecs.java (revision 921577) +++ src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -460,9 +460,9 @@ } @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) + public FieldsProducer fieldsProducer(SegmentReadState readState) throws IOException { - return state.get(si.name); + return state.get(readState.segmentInfo.name); } @Override @@ -565,7 +565,7 @@ fields.add(fi.name); Codec codec = getCodec(fi.name); if (!codecs.containsKey(codec)) { - codecs.put(codec, codec.fieldsProducer(dir, fieldInfos, si, readBufferSize, indexDivisor)); + codecs.put(codec, codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, readBufferSize, indexDivisor))); } } } @@ -644,11 +644,9 @@ } } - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, - SegmentInfo si, int readBufferSize, - int indexDivisor) - throws IOException { - return new FieldsReader(dir, fieldInfos, si, readBufferSize, indexDivisor); + public FieldsProducer fieldsProducer(SegmentReadState state) + throws IOException { + return new FieldsReader(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor); } @Override @@ -674,7 +672,7 @@ } } - public static class MyCodecs extends Codecs { + public static class MyCodecs extends CodecProvider { PerFieldCodecWrapper perField; MyCodecs() { @@ -739,9 +737,9 @@ } @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - StandardPostingsReader docsReader = new StandardPostingsReaderImpl(dir, si, readBufferSize); + StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize); StandardPostingsReader pulsingReader = new PulsingPostingsReaderImpl(docsReader); // Terms dict index reader @@ -749,10 +747,10 @@ boolean success = false; try { - indexReader = new SimpleStandardTermsIndexReader(dir, - fieldInfos, - si.name, - indexDivisor, + indexReader = new SimpleStandardTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, reverseUnicodeComparator); success = true; } finally { @@ -765,9 +763,11 @@ success = false; try { FieldsProducer ret = new StandardTermsDictReader(indexReader, - dir, fieldInfos, si.name, + state.dir, + state.fieldInfos, + state.segmentInfo.name, pulsingReader, - readBufferSize, + state.readBufferSize, reverseUnicodeComparator, StandardCodec.TERMS_CACHE_SIZE); success = true; Index: src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- src/test/org/apache/lucene/index/TestDoc.java (revision 921577) +++ src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -35,7 +35,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; /** JUnit adaptation of an older test case DocTest. */ @@ -181,7 +181,7 @@ SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, merged, null, Codecs.getDefault()); + SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault()); merger.add(r1); merger.add(r2); Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 921577) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -42,7 +42,7 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; @@ -1410,7 +1410,7 @@ writer.close(); SegmentInfos sis = new SegmentInfos(); - sis.read(d, Codecs.getDefault()); + sis.read(d, CodecProvider.getDefault()); IndexReader r = IndexReader.open(d, false); IndexCommit c = r.getIndexCommit(); Index: src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 921577) +++ src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -17,10 +17,8 @@ import org.apache.lucene.store.*; import org.apache.lucene.document.*; import org.apache.lucene.analysis.*; +import org.apache.lucene.util.*; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.search.TermQuery; import java.util.*; @@ -28,8 +26,6 @@ import junit.framework.Assert; -// nocommit -- cut test over to flex API, but not too soon -// (it catches bugs in emulation) public class TestStressIndexing2 extends LuceneTestCase { static int maxFields=4; static int bigFieldSize=10; @@ -278,32 +274,53 @@ int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping - TermDocs termDocs1 = r1.termDocs(); - TermDocs termDocs2 = r2.termDocs(); - // create mapping from id2 space to id2 based on idField idField = StringHelper.intern(idField); - TermEnum termEnum = r1.terms (new Term (idField, "")); - do { - Term term = termEnum.term(); + final TermsEnum termsEnum = MultiFields.getFields(r1).terms(idField).iterator(); + + final Bits delDocs1 = MultiFields.getDeletedDocs(r1); + final Bits delDocs2 = MultiFields.getDeletedDocs(r2); + + Fields fields = MultiFields.getFields(r2); + if (fields == null) { + // make sure r1 is in fract empty (eg has only all + // deleted docs): + DocsEnum docs = null; + while(termsEnum.next() != null) { + docs = termsEnum.docs(delDocs1, docs); + while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { + fail("r1 is not empty but r2 is"); + } + } + return; + } + Terms terms2 = fields.terms(idField); + + DocsEnum termDocs1 = null; + DocsEnum termDocs2 = null; + + while(true) { + BytesRef term = termsEnum.next(); //System.out.println("TEST: match id term=" + term); - if (term==null || term.field() != idField) break; + if (term == null) { + break; + } - termDocs1.seek (termEnum); - if (!termDocs1.next()) { + termDocs1 = termsEnum.docs(delDocs1, termDocs1); + termDocs2 = terms2.docs(delDocs2, term, termDocs2); + + if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) { // This doc is deleted and wasn't replaced - termDocs2.seek(termEnum); - assertFalse(termDocs2.next()); + assertTrue(termDocs2 == null || termDocs2.nextDoc() == DocsEnum.NO_MORE_DOCS); continue; } - int id1 = termDocs1.doc(); - assertFalse(termDocs1.next()); + int id1 = termDocs1.docID(); + assertEquals(DocsEnum.NO_MORE_DOCS, termDocs1.nextDoc()); - termDocs2.seek(termEnum); - assertTrue(termDocs2.next()); - int id2 = termDocs2.doc(); - assertFalse(termDocs2.next()); + assertTrue(termDocs2.nextDoc() != DocsEnum.NO_MORE_DOCS); + int id2 = termDocs2.docID(); + assertEquals(DocsEnum.NO_MORE_DOCS, termDocs2.nextDoc()); r2r1[id2] = id1; @@ -337,65 +354,95 @@ throw e; } - } while (termEnum.next()); + } - termEnum.close(); //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); - TermEnum termEnum1 = r1.terms (new Term ("", "")); - //System.out.println("TEST: create te2"); - TermEnum termEnum2 = r2.terms (new Term ("", "")); + final FieldsEnum fields1 = MultiFields.getFields(r1).iterator(); + final FieldsEnum fields2 = MultiFields.getFields(r2).iterator(); + String field1=null, field2=null; + TermsEnum termsEnum1 = null; + TermsEnum termsEnum2 = null; + DocsEnum docs1=null, docs2=null; + // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.numDocs()]; long[] info2 = new long[r2.numDocs()]; for(;;) { - Term term1,term2; + BytesRef term1=null, term2=null; // iterate until we get some docs int len1; for(;;) { len1=0; - term1 = termEnum1.term(); + if (termsEnum1 == null) { + field1 = fields1.next(); + if (field1 == null) { + break; + } else { + termsEnum1 = fields1.terms(); + } + } + term1 = termsEnum1.next(); + if (term1 == null) { + // no more terms in this field + termsEnum1 = null; + continue; + } + //System.out.println("TEST: term1=" + term1); - if (term1==null) break; - termDocs1.seek(termEnum1); - while (termDocs1.next()) { - int d1 = termDocs1.doc(); - int f1 = termDocs1.freq(); - info1[len1] = (((long)d1)<<32) | f1; + docs1 = termsEnum1.docs(delDocs1, docs1); + while (docs1.nextDoc() != DocsEnum.NO_MORE_DOCS) { + int d = docs1.docID(); + int f = docs1.freq(); + info1[len1] = (((long)d)<<32) | f; len1++; } if (len1>0) break; - if (!termEnum1.next()) break; } - // iterate until we get some docs + // iterate until we get some docs int len2; for(;;) { len2=0; - term2 = termEnum2.term(); - //System.out.println("TEST: term2=" + term2); - if (term2==null) break; - termDocs2.seek(termEnum2); - while (termDocs2.next()) { - int d2 = termDocs2.doc(); - int f2 = termDocs2.freq(); - info2[len2] = (((long)r2r1[d2])<<32) | f2; + if (termsEnum2 == null) { + field2 = fields2.next(); + if (field2 == null) { + break; + } else { + termsEnum2 = fields2.terms(); + } + } + term2 = termsEnum2.next(); + if (term2 == null) { + // no more terms in this field + termsEnum2 = null; + continue; + } + + //System.out.println("TEST: term1=" + term1); + docs2 = termsEnum2.docs(delDocs2, docs2); + while (docs2.nextDoc() != DocsEnum.NO_MORE_DOCS) { + int d = r2r1[docs2.docID()]; + int f = docs2.freq(); + info2[len2] = (((long)d)<<32) | f; len2++; } if (len2>0) break; - if (!termEnum2.next()) break; } assertEquals(len1, len2); if (len1==0) break; // no more terms + assertEquals(field1, field2); + assertTrue(term1.bytesEquals(term2)); + if (!hasDeletes) - assertEquals(termEnum1.docFreq(), termEnum2.docFreq()); + assertEquals(termsEnum1.docFreq(), termsEnum2.docFreq()); assertEquals("len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes, term1, term2); @@ -404,11 +451,11 @@ // now compare for (int i=0; i limit) { + if (result1.count > limit) { // copy down - System.arraycopy(docs1, limit, docs1, 0, count1-limit); - System.arraycopy(freqs1, limit, freqs1, 0, count1-limit); + // nocommit -- hmm in general I should + // not muck w/ the int[]'s returned to + // me like this...? + System.arraycopy(result1.docs.ints, limit, result1.docs.ints, 0, result1.count-limit); + System.arraycopy(result1.freqs.ints, limit, result1.freqs.ints, 0, result1.count-limit); } - count1 -= limit; + result1.count -= limit; if (count2 > limit) { // copy down Index: src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 921577) +++ src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -131,8 +131,6 @@ if (termsEnum.next() != null) { // fill into a OpenBitSet final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - final int[] docs = new int[32]; - final int[] freqs = new int[32]; int termCount = 0; final Bits delDocs = MultiFields.getDeletedDocs(reader); DocsEnum docsEnum = null; @@ -142,9 +140,10 @@ // enumerator.term().toBytesString()); docsEnum = termsEnum.docs(delDocs, docsEnum); while (true) { - final int count = docsEnum.read(docs, freqs); - if (count != 0) { - for (int i = 0; i < count; i++) { + final DocsEnum.BulkReadResult result = docsEnum.read(); + if (result.count != 0) { + final int[] docs = result.docs.ints; + for (int i = 0; i < result.count; i++) { bitSet.set(docs[i]); } } else { Index: src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- src/java/org/apache/lucene/search/TermScorer.java (revision 921577) +++ src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -29,14 +29,15 @@ private byte[] norms; private float weightValue; private int doc = -1; + private int freq; - private final int[] docs = new int[32]; // buffered doc numbers - private final int[] freqs = new int[32]; // buffered term freqs private int pointer; private int pointerMax; private static final int SCORE_CACHE_SIZE = 32; private float[] scoreCache = new float[SCORE_CACHE_SIZE]; + private int[] docs; + private int[] freqs; /** * Construct a TermScorer. @@ -68,6 +69,13 @@ score(c, Integer.MAX_VALUE, nextDoc()); } + private final void refillBuffer() throws IOException { + final DocsEnum.BulkReadResult result = docsEnum.read(); // refill + pointerMax = result.count; + docs = result.docs.ints; + freqs = result.freqs.ints; + } + // firstDocID is ignored since nextDoc() sets 'doc' @Override protected boolean score(Collector c, int end, int firstDocID) throws IOException { @@ -75,7 +83,7 @@ while (doc < end) { // for docs in window c.collect(doc); // collect score if (++pointer >= pointerMax) { - pointerMax = docsEnum.read(docs, freqs); // refill buffers + refillBuffer(); if (pointerMax != 0) { pointer = 0; } else { @@ -84,12 +92,15 @@ } } doc = docs[pointer]; + freq = freqs[pointer]; } return true; } @Override - public int docID() { return doc; } + public int docID() { + return doc; + } /** * Advances to the next document matching the query.
@@ -102,7 +113,7 @@ public int nextDoc() throws IOException { pointer++; if (pointer >= pointerMax) { - pointerMax = docsEnum.read(docs, freqs); // refill buffer + refillBuffer(); if (pointerMax != 0) { pointer = 0; } else { @@ -110,6 +121,7 @@ } } doc = docs[pointer]; + freq = freqs[pointer]; assert doc != NO_MORE_DOCS; return doc; } @@ -117,11 +129,10 @@ @Override public float score() { assert doc != NO_MORE_DOCS; - int f = freqs[pointer]; float raw = // compute tf(f)*weight - f < SCORE_CACHE_SIZE // check cache - ? scoreCache[f] // cache hit - : getSimilarity().tf(f)*weightValue; // cache miss + freq < SCORE_CACHE_SIZE // check cache + ? scoreCache[freq] // cache hit + : getSimilarity().tf(freq)*weightValue; // cache miss return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[doc]); // normalize for field } @@ -140,6 +151,7 @@ // first scan in cache for (pointer++; pointer < pointerMax; pointer++) { if (docs[pointer] >= target) { + freq = freqs[pointer]; return doc = docs[pointer]; } } @@ -148,10 +160,8 @@ int newDoc = docsEnum.advance(target); //System.out.println("ts.advance docsEnum=" + docsEnum); if (newDoc != DocsEnum.NO_MORE_DOCS) { - pointerMax = 1; - pointer = 0; - docs[pointer] = doc = newDoc; - freqs[pointer] = docsEnum.freq(); + doc = newDoc; + freq = docsEnum.freq(); } else { doc = NO_MORE_DOCS; } Index: src/java/org/apache/lucene/index/AllDocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/AllDocsEnum.java (revision 921577) +++ src/java/org/apache/lucene/index/AllDocsEnum.java (working copy) @@ -48,10 +48,12 @@ } @Override - public int read(int[] docs, int[] freqs) throws IOException { - final int length = docs.length; + public BulkReadResult read() throws IOException { + initBulkResult(); + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; int i = 0; - while (i < length && doc < maxDoc) { + while (i < docs.length && doc < maxDoc) { if (skipDocs == null || !skipDocs.get(doc)) { docs[i] = doc; freqs[i] = 1; @@ -59,7 +61,8 @@ } doc++; } - return i; + bulkResult.count = i; + return bulkResult; } @Override Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 921577) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BitVector; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import java.io.IOException; import java.util.List; import java.util.Map; @@ -169,7 +169,7 @@ * @param format format of the segments info file * @param input input handle to read segment info from */ - SegmentInfo(Directory dir, int format, IndexInput input, Codecs codecs) throws IOException { + SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException { this.dir = dir; name = input.readString(); docCount = input.readInt(); Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 921577) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -40,7 +40,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.UnicodeUtil; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.preflex.PreFlexFields; import org.apache.lucene.index.codecs.preflex.SegmentTermDocs; @@ -95,7 +95,7 @@ final FieldsProducer fields; final boolean isPreFlex; - final Codecs codecs; + final CodecProvider codecs; final Directory dir; final Directory cfsDir; @@ -109,7 +109,7 @@ CompoundFileReader cfsReader; CompoundFileReader storeCFSReader; - CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor, Codecs codecs) throws IOException { + CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor, CodecProvider codecs) throws IOException { if (termsIndexDivisor < 1 && termsIndexDivisor != -1) { throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + termsIndexDivisor); @@ -120,7 +120,7 @@ System.out.println("sr: init core for segment=" + segment); } if (codecs == null) { - codecs = Codecs.getDefault(); + codecs = CodecProvider.getDefault(); } this.codecs = codecs; this.readBufferSize = readBufferSize; @@ -144,7 +144,7 @@ if (Codec.DEBUG) { System.out.println("sr.core.init: seg=" + si.name + " codec=" + si.getCodec()); } - fields = si.getCodec().fieldsProducer(cfsDir, fieldInfos, si, readBufferSize, termsIndexDivisor); + fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor)); assert fields != null; isPreFlex = fields instanceof PreFlexFields; @@ -517,10 +517,10 @@ int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor, - Codecs codecs) + CodecProvider codecs) throws CorruptIndexException, IOException { if (codecs == null) { - codecs = Codecs.getDefault(); + codecs = CodecProvider.getDefault(); } SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader(); @@ -1533,11 +1533,40 @@ } } + private DocsEnum.BulkReadResult pendingBulkResult; + private int pendingBulk; + public int read(int[] docs, int[] freqs) throws IOException { if (!any) { return 0; + } else if (pendingBulk > 0) { + final int left = pendingBulkResult.count - pendingBulk; + if (docs.length >= left) { + // read all pending + System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, left); + System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, left); + pendingBulk = 0; + return left; + } else { + // read only part of pending + System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, docs.length); + System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, docs.length); + pendingBulk += docs.length; + return docs.length; + } } else { - return docsEnum.read(docs, freqs); + // nothing pending + pendingBulkResult = docsEnum.read(); + if (docs.length >= pendingBulkResult.count) { + System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, pendingBulkResult.count); + System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, pendingBulkResult.count); + return pendingBulkResult.count; + } else { + System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, docs.length); + System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, docs.length); + pendingBulk = docs.length; + return docs.length; + } } } Index: src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 921577) +++ src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -113,8 +113,9 @@ else lastPerField.next = perField.next; - if (state.docWriter.infoStream != null) - state.docWriter.infoStream.println(" purge field=" + perField.fieldInfo.name); + if (state.infoStream != null) { + state.infoStream.println(" purge field=" + perField.fieldInfo.name); + } totalFieldCount--; Index: src/java/org/apache/lucene/index/StoredFieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/StoredFieldsWriter.java (revision 921577) +++ src/java/org/apache/lucene/index/StoredFieldsWriter.java (working copy) @@ -90,8 +90,8 @@ state.flushedFiles.add(fieldsName); state.flushedFiles.add(fieldsIdxName); - state.docWriter.removeOpenFile(fieldsName); - state.docWriter.removeOpenFile(fieldsIdxName); + docWriter.removeOpenFile(fieldsName); + docWriter.removeOpenFile(fieldsIdxName); if (4+((long) state.numDocsInStore)*8 != state.directory.fileLength(fieldsIdxName)) throw new RuntimeException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.fileLength(fieldsIdxName) + " length in bytes of " + fieldsIdxName + " file exists?=" + state.directory.fileExists(fieldsIdxName)); Index: src/java/org/apache/lucene/index/SegmentReadState.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReadState.java (revision 0) +++ src/java/org/apache/lucene/index/SegmentReadState.java (revision 0) @@ -0,0 +1,43 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.Directory; + +/** + * @lucene.experimental + */ +public class SegmentReadState { + public final Directory dir; + public final SegmentInfo segmentInfo; + public final FieldInfos fieldInfos; + public final int readBufferSize; + public final int termsIndexDivisor; + + public SegmentReadState(Directory dir, + SegmentInfo info, + FieldInfos fieldInfos, + int readBufferSize, + int termsIndexDivisor) { + this.dir = dir; + this.segmentInfo = info; + this.fieldInfos = fieldInfos; + this.readBufferSize = readBufferSize; + this.termsIndexDivisor = termsIndexDivisor; + } +} \ No newline at end of file Property changes on: src/java/org/apache/lucene/index/SegmentReadState.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 921577) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.store.ChecksumIndexOutput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.NoSuchDirectoryException; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.ThreadInterruptedException; import java.io.FileNotFoundException; @@ -234,7 +234,7 @@ * @throws IOException if there is a low-level IO error */ public final void read(Directory directory, String segmentFileName, - Codecs codecs) throws CorruptIndexException, IOException { + CodecProvider codecs) throws CorruptIndexException, IOException { boolean success = false; // Clear any previous segments: @@ -307,10 +307,10 @@ * @throws IOException if there is a low-level IO error */ public final void read(Directory directory) throws CorruptIndexException, IOException { - read(directory, Codecs.getDefault()); + read(directory, CodecProvider.getDefault()); } - public final void read(Directory directory, final Codecs codecs) throws CorruptIndexException, IOException { + public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException { generation = lastGeneration = -1; new FindSegmentsFile(directory) { @@ -410,7 +410,7 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static long readCurrentVersion(Directory directory, final Codecs codecs) + public static long readCurrentVersion(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException { // Fully read the segments file: this ensures that it's @@ -428,7 +428,7 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static Map readCurrentUserData(Directory directory, Codecs codecs) + public static Map readCurrentUserData(Directory directory, CodecProvider codecs) throws CorruptIndexException, IOException { SegmentInfos sis = new SegmentInfos(); sis.read(directory, codecs); Index: src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- src/java/org/apache/lucene/index/CheckIndex.java (revision 921577) +++ src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.document.AbstractField; // for javadocs import org.apache.lucene.document.Document; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -280,7 +280,7 @@ } protected Status checkIndex(List onlySegments) throws IOException { - return checkIndex(onlySegments, Codecs.getDefault()); + return checkIndex(onlySegments, CodecProvider.getDefault()); } /** Returns a {@link Status} instance detailing @@ -295,7 +295,7 @@ *

WARNING: make sure * you only call this when the index is not opened by any * writer. */ - protected Status checkIndex(List onlySegments, Codecs codecs) throws IOException { + protected Status checkIndex(List onlySegments, CodecProvider codecs) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); Index: src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (revision 921577) +++ src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (working copy) @@ -18,22 +18,22 @@ */ import org.apache.lucene.store.Directory; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import java.io.IOException; import java.util.Map; class ReadOnlyDirectoryReader extends DirectoryReader { - ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor, Codecs codecs) throws IOException { + ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { super(directory, sis, deletionPolicy, true, termInfosIndexDivisor, codecs); } ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone, - int termInfosIndexDivisor, Codecs codecs) throws IOException { + int termInfosIndexDivisor, CodecProvider codecs) throws IOException { super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor, codecs); } - ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, Codecs codecs) throws IOException { + ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { super(writer, infos, termInfosIndexDivisor, codecs); } Index: src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- src/java/org/apache/lucene/index/DocsEnum.java (revision 921577) +++ src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.IntsRef; /** Iterates through the documents, term freq and positions. * NOTE: you must first call {@link #next}. @@ -44,16 +45,42 @@ return atts; } - // nocommit -- fix this API so that intblock codecs are - // able to return their own int arrays, to save a copy... IntsRef? - /** Bulk read: returns number of docs read. After this is - * called, {@link #doc} and {@link #freq} are undefined. + // TODO: maybe add bulk read only docIDs (for eventual + // match-only scoring) + + public static class BulkReadResult { + public final IntsRef docs = new IntsRef(); + public final IntsRef freqs = new IntsRef(); + public int count; + } + + protected BulkReadResult bulkResult; + + protected final void initBulkResult() { + if (bulkResult == null) { + bulkResult = new BulkReadResult(); + bulkResult.docs.ints = new int[64]; + bulkResult.freqs.ints = new int[64]; + } + } + + /** Bulk read (docs and freqs). After this is called, + * {@link #doc} and {@link #freq} are undefined. You must + * refer to the count member of BulkResult to determine + * how many docs were loaded (the IntsRef for docs and + * freqs will not have their length set). This method + * will not return null. The end has been reached when + * .count is 0. * *

NOTE: the default impl simply delegates to {@link * #nextDoc}, but subclasses may do this more * efficiently. */ - public int read(int[] docs, int[] freqs) throws IOException { + // nocommit -- maybe pre-share the BulkReadResult.... hmm + public BulkReadResult read() throws IOException { + initBulkResult(); int count = 0; + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; while(count < docs.length) { final int doc = nextDoc(); if (doc != NO_MORE_DOCS) { @@ -64,6 +91,7 @@ break; } } - return count; + bulkResult.count = count; + return bulkResult; } } Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 921577) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -35,7 +35,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; @@ -49,7 +49,7 @@ protected Directory directory; protected boolean readOnly; - protected Codecs codecs; + protected CodecProvider codecs; IndexWriter writer; @@ -77,10 +77,10 @@ // } static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, - final int termInfosIndexDivisor, Codecs codecs) throws CorruptIndexException, IOException { - final Codecs codecs2; + final int termInfosIndexDivisor, CodecProvider codecs) throws CorruptIndexException, IOException { + final CodecProvider codecs2; if (codecs == null) { - codecs2 = Codecs.getDefault(); + codecs2 = CodecProvider.getDefault(); } else { codecs2 = codecs; } @@ -103,7 +103,7 @@ // } /** Construct reading the named set of readers. */ - DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, Codecs codecs) throws IOException { + DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; @@ -111,7 +111,7 @@ this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { - this.codecs = Codecs.getDefault(); + this.codecs = CodecProvider.getDefault(); } else { this.codecs = codecs; } @@ -151,13 +151,13 @@ } // Used by near real-time search - DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, Codecs codecs) throws IOException { + DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { - this.codecs = Codecs.getDefault(); + this.codecs = CodecProvider.getDefault(); } else { this.codecs = codecs; } @@ -213,13 +213,13 @@ /** This constructor is only used for {@link #reopen()} */ DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, - Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor, Codecs codecs) throws IOException { + Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { - this.codecs = Codecs.getDefault(); + this.codecs = CodecProvider.getDefault(); } else { this.codecs = codecs; } @@ -1039,11 +1039,11 @@ /** @see org.apache.lucene.index.IndexReader#listCommits */ public static Collection listCommits(Directory dir) throws IOException { - return listCommits(dir, Codecs.getDefault()); + return listCommits(dir, CodecProvider.getDefault()); } /** @see org.apache.lucene.index.IndexReader#listCommits */ - public static Collection listCommits(Directory dir, Codecs codecs) throws IOException { + public static Collection listCommits(Directory dir, CodecProvider codecs) throws IOException { final String[] files = dir.listAll(); Collection commits = new ArrayList(); Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 921577) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -27,7 +27,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.MergePolicy.MergeAbortedException; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; @@ -75,11 +75,11 @@ when merging stored fields */ private final static int MAX_RAW_MERGE_DOCS = 4192; - private final Codecs codecs; + private final CodecProvider codecs; private Codec codec; private SegmentWriteState segmentWriteState; - SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, Codecs codecs) { + SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs) { directory = dir; this.codecs = codecs; segment = name; @@ -555,7 +555,7 @@ private final void mergeTerms() throws CorruptIndexException, IOException { - // Let Codecs decide which codec will be used to write + // Let CodecProvider decide which codec will be used to write // the new segment: codec = codecs.getWriter(segmentWriteState); @@ -631,9 +631,13 @@ final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState); - // nocommit: somewhat stupid that we create this only to - // have it broken apart when we step through the docs - // enums in MultidDcsEnum.... is there a cleaner way? + // NOTE: this is silly, yet, necessary -- we create a + // MultiBits as our skip docs only to have it broken + // apart when we step through the docs enums in + // MultidDcsEnum.... this only matters when we are + // interacting with a non-core IR subclass, because + // LegacyFieldsEnum.LegacyDocs[AndPositions]Enum checks + // that the skipDocs matches the delDocs for the reader mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts); try { Index: src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermsEnum.java (revision 921577) +++ src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -58,7 +58,6 @@ * indicate whether exact term was found, a different * term was found, or EOF was hit. The target term may * be befor or after the current term. */ - // nocommit -- add boolean doCache? public abstract SeekStatus seek(BytesRef text) throws IOException; /** Seeks to the specified term by ordinal (position) as @@ -81,11 +80,9 @@ /** Returns ordinal position for current term. This is an * optional method (the codec may throw {@link * UnsupportedOperationException}). Do not call this - * before calling next() for the first time, after next() - * returns null or seek returns {@link - * SeekStatus#END}. */ - // nocommit -- should we allow calling this after next - // returns null? and it returns 1+ max ord? + * before calling {@link #next} for the first time or after + * {@link #next} returns null or {@link #seek} returns + * END; */ public abstract long ord() throws IOException; /** Returns the number of documents containing the current Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 921577) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -345,7 +345,7 @@ /** Expert: returns an IndexReader reading the index in * the given Directory, with a custom {@link - * IndexDeletionPolicy}, and specified {@link Codecs}. + * IndexDeletionPolicy}, and specified {@link CodecProvider}. * You should pass readOnly=true, since it gives much * better concurrent performance, unless you intend to do * write operations (delete documents or change norms) @@ -365,18 +365,18 @@ * memory usage, at the expense of higher latency when * loading a TermInfo. The default value is 1. Set this * to -1 to skip loading the terms index entirely. - * @param codecs Codecs to use when opening index + * @param codecs CodecProvider to use when opening index * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, Codecs codecs) throws CorruptIndexException, IOException { + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, CodecProvider codecs) throws CorruptIndexException, IOException { return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor, codecs); } /** Expert: returns an IndexReader reading the index in * the given Directory, using a specific commit and with * a custom {@link IndexDeletionPolicy} and specified - * {@link Codecs}. You should pass readOnly=true, since + * {@link CodecProvider}. You should pass readOnly=true, since * it gives much better concurrent performance, unless * you intend to do write operations (delete documents or * change norms) with the reader. @@ -398,18 +398,18 @@ * memory usage, at the expense of higher latency when * loading a TermInfo. The default value is 1. Set this * to -1 to skip loading the terms index entirely. - * @param codecs Codecs to use when opening index + * @param codecs CodecProvider to use when opening index * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, Codecs codecs) throws CorruptIndexException, IOException { + public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor, CodecProvider codecs) throws CorruptIndexException, IOException { return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor, codecs); } private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor, - Codecs codecs) throws CorruptIndexException, IOException { + CodecProvider codecs) throws CorruptIndexException, IOException { if (codecs == null) { - codecs = Codecs.getDefault(); + codecs = CodecProvider.getDefault(); } return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, codecs); } @@ -553,7 +553,7 @@ * @throws IOException if there is a low-level IO error */ public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException { - return SegmentInfos.readCurrentVersion(directory, Codecs.getDefault()); + return SegmentInfos.readCurrentVersion(directory, CodecProvider.getDefault()); } /** @@ -571,7 +571,7 @@ * @see #getCommitUserData() */ public static Map getCommitUserData(Directory directory) throws CorruptIndexException, IOException { - return SegmentInfos.readCurrentUserData(directory, Codecs.getDefault()); + return SegmentInfos.readCurrentUserData(directory, CodecProvider.getDefault()); } /** Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 921577) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -589,7 +589,7 @@ synchronized private void initFlushState(boolean onlyDocStore) { initSegmentName(onlyDocStore); - flushState = new SegmentWriteState(this, directory, segment, docFieldProcessor.fieldInfos, + flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos, docStoreSegment, numDocsInRAM, numDocsInStore, writer.getTermIndexInterval(), writer.codecs); } Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 921577) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.util.Constants; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.ThreadInterruptedException; import java.io.IOException; @@ -983,7 +983,7 @@ * false or if there is any other low-level * IO error */ - public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit, Codecs codecs) + public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit, CodecProvider codecs) throws CorruptIndexException, LockObtainFailedException, IOException { init(d, a, create, deletionPolicy, mfl.getLimit(), indexingChain, commit, codecs); } @@ -1025,7 +1025,7 @@ init(d, a, false, deletionPolicy, mfl.getLimit(), null, commit, null); } - Codecs codecs; + CodecProvider codecs; private void init(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) @@ -1039,11 +1039,11 @@ private void init(Directory d, Analyzer a, final boolean create, IndexDeletionPolicy deletionPolicy, int maxFieldLength, - IndexingChain indexingChain, IndexCommit commit, Codecs codecsIn) + IndexingChain indexingChain, IndexCommit commit, CodecProvider codecsIn) throws CorruptIndexException, LockObtainFailedException, IOException { if (codecsIn == null) { - codecs = Codecs.getDefault(); + codecs = CodecProvider.getDefault(); } else { codecs = codecsIn; } Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 921577) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -30,7 +30,7 @@ import java.util.List; import java.util.Map; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.Directory; /* @@ -128,7 +128,7 @@ * @throws IOException if there is a low-level IO error */ public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, DocumentsWriter docWriter, - Codecs codecs) + CodecProvider codecs) throws CorruptIndexException, IOException { this.docWriter = docWriter; Index: src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- src/java/org/apache/lucene/index/SegmentWriteState.java (revision 921577) +++ src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -19,10 +19,11 @@ import java.util.HashSet; import java.util.Collection; +import java.io.PrintStream; import org.apache.lucene.store.Directory; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; /** * This class is not meant for public usage; it's only @@ -31,45 +32,42 @@ * @lucene.experimental */ public class SegmentWriteState { - // nocommit -- not clean that this is here; sometimes we - // write a newly flushed segment; other times a merged - // segment (and this is null): - DocumentsWriter docWriter; - public Directory directory; - public String segmentName; - public FieldInfos fieldInfos; - String docStoreSegmentName; - public int numDocs; - int numDocsInStore; - public Collection flushedFiles; + public final PrintStream infoStream; + public final Directory directory; + public final String segmentName; + public final FieldInfos fieldInfos; + public final String docStoreSegmentName; + public final int numDocs; + public int numDocsInStore; + public final Collection flushedFiles; // Actual codec used - Codec codec; + final Codec codec; /** Expert: The fraction of terms in the "dictionary" which should be stored * in RAM. Smaller values use more memory, but make searching slightly * faster, while larger values use less memory and make searching slightly * slower. Searching is typically not dominated by dictionary lookup, so * tweaking this is rarely useful.*/ - public int termIndexInterval; + public final int termIndexInterval; /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in * smaller indexes, greater acceleration, but fewer accelerable cases, while * smaller values result in bigger indexes, less acceleration and more * accelerable cases. More detailed experiments would be useful here. */ - public int skipInterval = 16; + public final int skipInterval = 16; /** Expert: The maximum number of skip levels. Smaller values result in * slightly smaller indexes, but slower skipping in big posting lists. */ - public int maxSkipLevels = 10; + public final int maxSkipLevels = 10; - public SegmentWriteState(DocumentsWriter docWriter, Directory directory, String segmentName, FieldInfos fieldInfos, + public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, String docStoreSegmentName, int numDocs, int numDocsInStore, int termIndexInterval, - Codecs codecs) { - this.docWriter = docWriter; + CodecProvider codecs) { + this.infoStream = infoStream; this.directory = directory; this.segmentName = segmentName; this.fieldInfos = fieldInfos; Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNameFilter.java (revision 921577) +++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (working copy) @@ -20,7 +20,7 @@ import java.io.File; import java.io.FilenameFilter; import java.util.HashSet; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; /** * Filename filter that accept filenames and extensions only @@ -33,7 +33,7 @@ private final HashSet extensions; - public IndexFileNameFilter(Codecs codecs) { + public IndexFileNameFilter(CodecProvider codecs) { extensions = new HashSet(); for (String ext : IndexFileNames.INDEX_EXTENSIONS) { extensions.add(ext); Index: src/java/org/apache/lucene/index/codecs/Codecs.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codecs.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/Codecs.java (working copy) @@ -1,94 +0,0 @@ -package org.apache.lucene.index.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; - -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.codecs.intblock.IntBlockCodec; -import org.apache.lucene.index.codecs.preflex.PreFlexCodec; -import org.apache.lucene.index.codecs.pulsing.PulsingCodec; -import org.apache.lucene.index.codecs.sep.SepCodec; -import org.apache.lucene.index.codecs.standard.StandardCodec; - -/** Holds a set of codecs, keyed by name. You subclass - * this, instantiate it, and register your codecs, then - * pass this instance to IndexReader/IndexWriter (via - * package private APIs) to use different codecs when - * reading & writing segments. - * @lucene.experimental */ - -public abstract class Codecs { - - private final HashMap codecs = new HashMap(); - - private final Set knownExtensions = new HashSet(); - - public void register(Codec codec) { - if (codec.name == null) { - throw new IllegalArgumentException("code.name is null"); - } - - if (!codecs.containsKey(codec.name)) { - codecs.put(codec.name, codec); - codec.getExtensions(knownExtensions); - } else if (codecs.get(codec.name) != codec) { - throw new IllegalArgumentException("codec '" + codec.name + "' is already registered as a different codec instance"); - } - } - - public Collection getAllExtensions() { - return knownExtensions; - } - - public Codec lookup(String name) { - final Codec codec = (Codec) codecs.get(name); - if (codec == null) - throw new IllegalArgumentException("required codec '" + name + "' not found"); - return codec; - } - - public abstract Codec getWriter(SegmentWriteState state); - - static private final Codecs defaultCodecs = new DefaultCodecs(); - - public static Codecs getDefault() { - return defaultCodecs; - } -} - -class DefaultCodecs extends Codecs { - DefaultCodecs() { - register(new StandardCodec()); - register(new IntBlockCodec()); - register(new PreFlexCodec()); - register(new PulsingCodec()); - register(new SepCodec()); - } - - @Override - public Codec getWriter(SegmentWriteState state) { - return lookup("Standard"); - //return lookup("Pulsing"); - //return lookup("Sep"); - //return lookup("IntBlock"); - } -} \ No newline at end of file Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl; @@ -95,11 +96,11 @@ } @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { // We wrap StandardPostingsReaderImpl, but any StandardPostingsReader // will work: - StandardPostingsReader docsReader = new StandardPostingsReaderImpl(dir, si, readBufferSize); + StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize); StandardPostingsReader pulsingReader = new PulsingPostingsReaderImpl(docsReader); // Terms dict index reader @@ -107,10 +108,10 @@ boolean success = false; try { - indexReader = new SimpleStandardTermsIndexReader(dir, - fieldInfos, - si.name, - indexDivisor, + indexReader = new SimpleStandardTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, BytesRef.getUTF8SortedAsUTF16Comparator()); success = true; } finally { @@ -123,9 +124,9 @@ success = false; try { FieldsProducer ret = new StandardTermsDictReader(indexReader, - dir, fieldInfos, si.name, + state.dir, state.fieldInfos, state.segmentInfo.name, pulsingReader, - readBufferSize, + state.readBufferSize, BytesRef.getUTF8SortedAsUTF16Comparator(), StandardCodec.TERMS_CACHE_SIZE); success = true; Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (working copy) @@ -269,9 +269,12 @@ } @Override - public int read(int[] docs, int[] freqs) { + public BulkReadResult read() { int i=0; // TODO: -- ob1? + initBulkResult(); + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; while(nextRead < state.docFreq) { doc = state.docs[nextRead++]; if (skipDocs == null || !skipDocs.get(doc.docID)) { @@ -280,7 +283,8 @@ i++; } } - return i; + bulkResult.count = i; + return bulkResult; } @Override Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -360,8 +360,11 @@ } @Override - public int read(int[] docs, int[] freqs) throws IOException { + public BulkReadResult read() throws IOException { // TODO: -- switch to bulk read api in IntIndexInput + initBulkResult(); + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; int i = 0; final int length = docs.length; while (i < length && count < docFreq) { @@ -378,8 +381,8 @@ i++; } } - - return i; + bulkResult.count = i; + return bulkResult; } @Override Index: src/java/org/apache/lucene/index/codecs/sep/SepCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepCodec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/sep/SepCodec.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -84,17 +85,17 @@ final static String PAYLOAD_EXTENSION = "pyl"; @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - StandardPostingsReader postingsReader = new SepPostingsReaderImpl(dir, si, readBufferSize, new SingleIntFactory()); + StandardPostingsReader postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize, new SingleIntFactory()); StandardTermsIndexReader indexReader; boolean success = false; try { - indexReader = new SimpleStandardTermsIndexReader(dir, - fieldInfos, - si.name, - indexDivisor, + indexReader = new SimpleStandardTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, BytesRef.getUTF8SortedAsUTF16Comparator()); success = true; } finally { @@ -106,9 +107,11 @@ success = false; try { FieldsProducer ret = new StandardTermsDictReader(indexReader, - dir, fieldInfos, si.name, + state.dir, + state.fieldInfos, + state.segmentInfo.name, postingsReader, - readBufferSize, + state.readBufferSize, BytesRef.getUTF8SortedAsUTF16Comparator(), StandardCodec.TERMS_CACHE_SIZE); success = true; Index: src/java/org/apache/lucene/index/codecs/Codec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/Codec.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -56,8 +57,7 @@ /** Reads a segment. NOTE: by the time this call * returns, it must hold open any files it will need to * use; else, those files may be deleted. */ - // nocommit -- make a SegmentReadState (symmetric)? - public abstract FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException; + public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException; /** Gathers files associated with this segment */ public abstract void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException; Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy) @@ -280,10 +280,13 @@ } @Override - public int read(int[] docs, int[] freqs) throws IOException { + public BulkReadResult read() throws IOException { if (Codec.DEBUG) { Codec.debug("sdr.bulk read: ord=" + ord + " df=" + limit + " omitTF=" + omitTF + " ord=" + ord + " of " + limit + " freq.fp=" + freqIn.getFilePointer(), desc); } + initBulkResult(); + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; int i = 0; final int length = docs.length; while (i < length && ord < limit) { @@ -310,11 +313,13 @@ ++i; } } + if (Codec.DEBUG) { System.out.println(" return " + i); } - return i; + bulkResult.count = i; + return bulkResult; } @Override Index: src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; @@ -73,16 +74,16 @@ public final static int TERMS_CACHE_SIZE = 1024; @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { - StandardPostingsReader postings = new StandardPostingsReaderImpl(dir, si, readBufferSize); + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + StandardPostingsReader postings = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize); StandardTermsIndexReader indexReader; boolean success = false; try { - indexReader = new SimpleStandardTermsIndexReader(dir, - fieldInfos, - si.name, - indexDivisor, + indexReader = new SimpleStandardTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, BytesRef.getUTF8SortedAsUTF16Comparator()); success = true; } finally { @@ -94,9 +95,11 @@ success = false; try { FieldsProducer ret = new StandardTermsDictReader(indexReader, - dir, fieldInfos, si.name, + state.dir, + state.fieldInfos, + state.segmentInfo.name, postings, - readBufferSize, + state.readBufferSize, BytesRef.getUTF8SortedAsUTF16Comparator(), TERMS_CACHE_SIZE); success = true; Index: src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (working copy) @@ -129,7 +129,7 @@ return clone; } - final void seek(long pointer, int p, Term t, TermInfo ti) + final void seek(long pointer, long p, Term t, TermInfo ti) throws IOException { input.seek(pointer); position = p; Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -61,8 +62,8 @@ } @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor) throws IOException { - return new PreFlexFields(dir, fieldInfos, info, readBufferSize, indexDivisor); + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + return new PreFlexFields(state.dir, state.fieldInfos, state.segmentInfo, state.readBufferSize, state.termsIndexDivisor); } @Override Index: src/java/org/apache/lucene/index/codecs/preflex/TermInfo.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/TermInfo.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/preflex/TermInfo.java (working copy) @@ -23,7 +23,7 @@ * indexing. */ @Deprecated -final class TermInfo { +class TermInfo { /** The number of documents which contain the term. */ int docFreq = 0; Index: src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (working copy) @@ -25,8 +25,8 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.store.Directory; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.cache.DoubleBarrelLRUCache; import org.apache.lucene.util.cache.Cache; -import org.apache.lucene.util.cache.SimpleLRUCache; /** This stores a monotonically increasing set of pairs in a * Directory. Pairs are accessed either by Term or by ordinal position the @@ -53,14 +53,22 @@ private final static int DEFAULT_CACHE_SIZE = 1024; + // Just adds term's ord to TermInfo + private final static class TermInfoAndOrd extends TermInfo { + final int termOrd; + public TermInfoAndOrd(TermInfo ti, int termOrd) { + super(ti); + this.termOrd = termOrd; + } + } + + private final Cache termsCache = new DoubleBarrelLRUCache(DEFAULT_CACHE_SIZE); + /** * Per-thread resources managed by ThreadLocal */ private static final class ThreadResources { SegmentTermEnum termEnum; - - // Used for caching the least recently looked-up Terms - Cache termInfoCache; } TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor) @@ -138,6 +146,7 @@ if (origEnum != null) origEnum.close(); threadResources.close(); + termsCache.close(); } /** Returns the number of term/value pairs in the set. */ @@ -146,12 +155,10 @@ } private ThreadResources getThreadResources() { - ThreadResources resources = (ThreadResources)threadResources.get(); + ThreadResources resources = threadResources.get(); if (resources == null) { resources = new ThreadResources(); resources.termEnum = terms(); - // Cache does not have to be thread-safe, it is only used by one thread at the same time - resources.termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE); threadResources.set(resources); } return resources; @@ -178,36 +185,38 @@ private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException { enumerator.seek(indexPointers[indexOffset], - (indexOffset * totalIndexInterval) - 1, - indexTerms[indexOffset], indexInfos[indexOffset]); + ((long) indexOffset * totalIndexInterval) - 1, + indexTerms[indexOffset], indexInfos[indexOffset]); } /** Returns the TermInfo for a Term in the set, or null. */ TermInfo get(Term term) throws IOException { - return get(term, true); + return get(term, false); } /** Returns the TermInfo for a Term in the set, or null. */ - private TermInfo get(Term term, boolean useCache) throws IOException { + private TermInfo get(Term term, boolean mustSeekEnum) throws IOException { if (size == 0) return null; ensureIndexIsRead(); - TermInfo ti; + TermInfoAndOrd tiOrd = termsCache.get(term); ThreadResources resources = getThreadResources(); - Cache cache = null; - - if (useCache) { - cache = resources.termInfoCache; - // check the cache first if the term was recently looked up - ti = (TermInfo) cache.get(term); - if (ti != null) { - return ti; - } + + if (!mustSeekEnum && tiOrd != null) { + return tiOrd; } + return seekEnum(resources.termEnum, term, tiOrd); + } + + TermInfo seekEnum(SegmentTermEnum enumerator, Term term) throws IOException { + return seekEnum(enumerator, term, termsCache.get(term)); + } + + TermInfo seekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd) throws IOException { + // optimize sequential access: first try scanning cached enum w/o seeking - SegmentTermEnum enumerator = resources.termEnum; if (enumerator.term() != null // term is at or past current && ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0) || term.compareTo(enumerator.term()) >= 0)) { @@ -216,16 +225,23 @@ || term.compareTo(indexTerms[enumOffset]) < 0) { // no need to seek + final TermInfo ti; + int numScans = enumerator.scanTo(term); if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) { ti = enumerator.termInfo(); - if (cache != null && numScans > 1) { + if (numScans > 1) { // we only want to put this TermInfo into the cache if // scanEnum skipped more than one dictionary entry. // This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers // of terms in order - cache.put(term, ti); + if (tiOrd == null) { + termsCache.put(term, new TermInfoAndOrd(ti, (int) enumerator.position)); + } else { + assert sameTermInfo(ti, tiOrd, enumerator); + assert (int) enumerator.position == tiOrd.termOrd; + } } } else { ti = null; @@ -236,12 +252,24 @@ } // random-access: must seek - seekEnum(enumerator, getIndexOffset(term)); + final int indexPos; + if (tiOrd != null) { + indexPos = tiOrd.termOrd / totalIndexInterval; + } else { + // Must do binary search: + indexPos = getIndexOffset(term); + } + + seekEnum(enumerator, indexPos); enumerator.scanTo(term); + final TermInfo ti; if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) { ti = enumerator.termInfo(); - if (cache != null) { - cache.put(term, ti); + if (tiOrd == null) { + termsCache.put(term, new TermInfoAndOrd(ti, (int) enumerator.position)); + } else { + assert sameTermInfo(ti, tiOrd, enumerator); + assert (int) enumerator.position == tiOrd.termOrd; } } else { ti = null; @@ -249,28 +277,25 @@ return ti; } - /** Returns the nth term in the set. */ - final Term get(int position) throws IOException { - if (size == 0) return null; - - SegmentTermEnum enumerator = getThreadResources().termEnum; - if (enumerator.term() != null && - position >= enumerator.position && - position < (enumerator.position + totalIndexInterval)) - return scanEnum(enumerator, position); // can avoid seek - - seekEnum(enumerator, position/totalIndexInterval); // must seek - return scanEnum(enumerator, position); + // called only from asserts + private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) { + if (ti1.docFreq != ti2.docFreq) { + return false; + } + if (ti1.freqPointer != ti2.freqPointer) { + return false; + } + if (ti1.proxPointer != ti2.proxPointer) { + return false; + } + // skipOffset is only valid when docFreq >= skipInterval: + if (ti1.docFreq >= enumerator.skipInterval && + ti1.skipOffset != ti2.skipOffset) { + return false; + } + return true; } - private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException { - while(enumerator.position < position) - if (!enumerator.next()) - return null; - - return enumerator.term(); - } - private void ensureIndexIsRead() { if (indexTerms == null) { throw new IllegalStateException("terms index was not loaded when this reader was created"); @@ -297,14 +322,12 @@ /** Returns an enumeration of all the Terms and TermInfos in the set. */ public SegmentTermEnum terms() { - return (SegmentTermEnum) origEnum.clone(); + return (SegmentTermEnum)origEnum.clone(); } /** Returns an enumeration of terms starting at or after the named term. */ public SegmentTermEnum terms(Term term) throws IOException { - // don't use the cache in this call because we want to reposition the - // enumeration - get(term, false); + get(term, true); return (SegmentTermEnum)getThreadResources().termEnum.clone(); } } Index: src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -57,7 +57,6 @@ private final int readBufferSize; private Directory cfsReader; - // nocommit -- we need the legacy terms cache back in here PreFlexFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, int readBufferSize, int indexDivisor) throws IOException { @@ -147,8 +146,6 @@ // terms reader with index, the segment has switched // to CFS - // nocommit -- not clean that I open my own CFS - // reader here; caller should pass it in? if (!(dir instanceof CompoundFileReader)) { dir0 = cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(si.name, IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); } else { @@ -247,10 +244,13 @@ final Term t = termEnum.term(); if (t != null && t.field() == fieldInfo.name) { // No need to seek -- we have already advanced onto - // this field + // this field. We must be @ first term because + // flex API will not advance this enum further, on + // seeing a different field. } else { assert t == null || !t.field().equals(fieldInfo.name); // make sure field name is interned - termEnum = getTermsDict().terms(new Term(fieldInfo.name, "")); + final TermInfosReader tis = getTermsDict(); + tis.seekEnum(termEnum, new Term(fieldInfo.name, "")); } skipNext = true; } @@ -278,7 +278,13 @@ System.out.println("pff.seek term=" + term); } skipNext = false; - termEnum = getTermsDict().terms(new Term(fieldInfo.name, term.utf8ToString())); + final TermInfosReader tis = getTermsDict(); + final Term t0 = new Term(fieldInfo.name, term.utf8ToString()); + if (termEnum == null) { + termEnum = tis.terms(t0); + } else { + tis.seekEnum(termEnum, t0); + } final Term t = termEnum.term(); final BytesRef tr; @@ -411,8 +417,14 @@ } @Override - public int read(int[] docs, int[] freqs) throws IOException { - return this.docs.read(docs, freqs); + public BulkReadResult read() throws IOException { + if (bulkResult == null) { + initBulkResult(); + bulkResult.docs.ints = new int[32]; + bulkResult.freqs.ints = new int[32]; + } + bulkResult.count = this.docs.read(bulkResult.docs.ints, bulkResult.freqs.ints); + return bulkResult; } } Index: src/java/org/apache/lucene/index/codecs/intblock/IntBlockCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/intblock/IntBlockCodec.java (revision 921577) +++ src/java/org/apache/lucene/index/codecs/intblock/IntBlockCodec.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -82,16 +83,19 @@ } @Override - public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException { - StandardPostingsReader postingsReader = new SepPostingsReaderImpl(dir, si, readBufferSize, new SimpleIntBlockFactory(1024)); + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + StandardPostingsReader postingsReader = new SepPostingsReaderImpl(state.dir, + state.segmentInfo, + state.readBufferSize, + new SimpleIntBlockFactory(1024)); StandardTermsIndexReader indexReader; boolean success = false; try { - indexReader = new SimpleStandardTermsIndexReader(dir, - fieldInfos, - si.name, - indexDivisor, + indexReader = new SimpleStandardTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, BytesRef.getUTF8SortedAsUTF16Comparator()); success = true; } finally { @@ -103,9 +107,11 @@ success = false; try { FieldsProducer ret = new StandardTermsDictReader(indexReader, - dir, fieldInfos, si.name, + state.dir, + state.fieldInfos, + state.segmentInfo.name, postingsReader, - readBufferSize, + state.readBufferSize, BytesRef.getUTF8SortedAsUTF16Comparator(), StandardCodec.TERMS_CACHE_SIZE); success = true; Index: src/java/org/apache/lucene/index/codecs/CodecProvider.java =================================================================== --- src/java/org/apache/lucene/index/codecs/CodecProvider.java (revision 920448) +++ src/java/org/apache/lucene/index/codecs/CodecProvider.java (working copy) @@ -34,9 +34,10 @@ * pass this instance to IndexReader/IndexWriter (via * package private APIs) to use different codecs when * reading & writing segments. + * * @lucene.experimental */ -public abstract class Codecs { +public abstract class CodecProvider { private final HashMap codecs = new HashMap(); @@ -68,15 +69,15 @@ public abstract Codec getWriter(SegmentWriteState state); - static private final Codecs defaultCodecs = new DefaultCodecs(); + static private final CodecProvider defaultCodecs = new DefaultCodecProvider(); - public static Codecs getDefault() { + public static CodecProvider getDefault() { return defaultCodecs; } } -class DefaultCodecs extends Codecs { - DefaultCodecs() { +class DefaultCodecProvider extends CodecProvider { + DefaultCodecProvider() { register(new StandardCodec()); register(new IntBlockCodec()); register(new PreFlexCodec()); Index: src/java/org/apache/lucene/util/BytesRef.java =================================================================== --- src/java/org/apache/lucene/util/BytesRef.java (revision 921577) +++ src/java/org/apache/lucene/util/BytesRef.java (working copy) @@ -23,7 +23,7 @@ /** Represents byte[], as a slice (offset + length) into an * existing byte[]. * - * @lucene.internal */ + * @lucene.experimental */ public final class BytesRef { public byte[] bytes; Index: backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestDoc.java (revision 916665) +++ backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -35,7 +35,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; /** JUnit adaptation of an older test case DocTest. */ @@ -181,7 +181,7 @@ SegmentReader r1 = SegmentReader.get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); SegmentReader r2 = SegmentReader.get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, merged, null, Codecs.getDefault()); + SegmentMerger merger = new SegmentMerger(si1.dir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, merged, null, CodecProvider.getDefault()); merger.add(r1); merger.add(r2); Index: backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 916665) +++ backwards/flex_1458_3_0_back_compat_tests/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.document.Document; -import org.apache.lucene.index.codecs.Codecs; +import org.apache.lucene.index.codecs.CodecProvider; import java.io.IOException; import java.util.Collection; @@ -65,7 +65,7 @@ } public void testMerge() throws IOException { - SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, Codecs.getDefault()); + SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriter.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault()); merger.add(reader1); merger.add(reader2); int docsMerged = merger.merge();