Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 793040) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -1800,4 +1800,40 @@ writer.close(); dir.close(); } + + // LUCENE-1609: don't load terms index + public void testNoTermsIndex() throws Throwable { + Directory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); + Document doc = new Document(); + doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.addDocument(doc); + writer.close(); + + IndexReader r = IndexReader.open(dir, null, true, -1); + try { + r.docFreq(new Term("field", "f")); + fail("did not hit expected exception"); + } catch (IllegalStateException ise) { + // expected + } + assertFalse(((SegmentReader) r.getSequentialSubReaders()[0]).termsIndexLoaded()); + + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); + writer.addDocument(doc); + writer.close(); + + // LUCENE-1718: ensure re-open carries over no terms index: + IndexReader r2 = r.reopen(); + r.close(); + IndexReader[] subReaders = r2.getSequentialSubReaders(); + assertEquals(2, subReaders.length); + for(int i=0;i<2;i++) { + assertFalse(((SegmentReader) subReaders[i]).termsIndexLoaded()); + } + r2.close(); + dir.close(); + } } Index: src/test/org/apache/lucene/index/TestSegmentTermDocs.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 793040) +++ src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy) @@ -53,8 +53,7 @@ public void testTermDocs(int indexDivisor) throws IOException { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -76,8 +75,7 @@ public void testBadSeek(int indexDivisor) throws IOException { { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -87,8 +85,7 @@ } { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -123,9 +120,7 @@ writer.optimize(); writer.close(); - IndexReader reader = IndexReader.open(dir); - reader.setTermInfosIndexDivisor(indexDivisor); - assertEquals(indexDivisor, reader.getTermInfosIndexDivisor()); + IndexReader reader = IndexReader.open(dir, null, true, indexDivisor); TermDocs tdocs = reader.termDocs(); @@ -239,21 +234,6 @@ testBadSeek(2); testSkipTo(2); } - - public void testIndexDivisorAfterLoad() throws IOException { - dir = new MockRAMDirectory(); - testDoc = new Document(); - DocHelper.setupDoc(testDoc); - SegmentInfo si = DocHelper.writeDoc(dir, testDoc); - SegmentReader reader = SegmentReader.get(si); - assertEquals(1, reader.docFreq(new Term("keyField", "Keyword"))); - try { - reader.setTermInfosIndexDivisor(2); - fail("did not hit IllegalStateException exception"); - } catch (IllegalStateException ise) { - // expected - } - } private void addDoc(IndexWriter writer, String value) throws IOException { Index: src/test/org/apache/lucene/index/TestSegmentReader.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentReader.java (revision 793040) +++ src/test/org/apache/lucene/index/TestSegmentReader.java (working copy) @@ -203,20 +203,4 @@ assertTrue(results != null); assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4); } - - public void testIndexDivisor() throws IOException { - dir = new MockRAMDirectory(); - testDoc = new Document(); - DocHelper.setupDoc(testDoc); - SegmentInfo si = DocHelper.writeDoc(dir, testDoc); - - reader = SegmentReader.get(si); - reader.setTermInfosIndexDivisor(3); - testDocument(); - testDelete(); - testGetFieldNameVariations(); - testNorms(); - testTerms(); - testTermVectors(); - } } Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 793040) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -81,10 +81,12 @@ final FieldInfos fieldInfos; final IndexInput freqStream; final IndexInput proxStream; + final TermInfosReader tisNoIndex; final Directory dir; final Directory cfsDir; final int readBufferSize; + final int termsIndexDivisor; TermInfosReader tis; FieldsReader fieldsReaderOrig; @@ -92,7 +94,7 @@ CompoundFileReader cfsReader; CompoundFileReader storeCFSReader; - CoreReaders(Directory dir, SegmentInfo si, int readBufferSize) throws IOException { + CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException { segment = si.name; this.readBufferSize = readBufferSize; this.dir = dir; @@ -109,7 +111,14 @@ fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); - tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); + this.termsIndexDivisor = termsIndexDivisor; + TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); + if (termsIndexDivisor == -1) { + tisNoIndex = reader; + } else { + tis = reader; + tisNoIndex = null; + } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them @@ -144,6 +153,38 @@ return cfsReader; } + synchronized TermInfosReader getTermsReader() { + if (tis != null) { + return tis; + } else { + return tisNoIndex; + } + } + + synchronized boolean termsIndexIsLoaded() { + return tis != null; + } + + synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException { + if (tis == null) { + Directory dir0; + if (si.getUseCompoundFile()) { + // In some cases, we were originally opened when CFS + // was not used, but then we are asked to open the + // terms reader with index, the segment has switched + // to CFS + if (cfsReader == null) { + cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); + } + dir0 = cfsReader; + } else { + dir0 = dir; + } + + tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); + } + } + synchronized void decRef() throws IOException { if (ref.decRef() == 0) { @@ -155,6 +196,10 @@ tis = null; } + if (tisNoIndex != null) { + tisNoIndex.close(); + } + if (freqStream != null) { freqStream.close(); } @@ -534,15 +579,15 @@ * @deprecated */ public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException { - return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true); + return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException { - return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true); + public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); } /** @@ -550,8 +595,8 @@ * @throws IOException if there is a low-level IO error * @deprecated */ - static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException { - return get(false, si.dir, si, readBufferSize, doOpenStores); + static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor); } /** @@ -562,7 +607,8 @@ Directory dir, SegmentInfo si, int readBufferSize, - boolean doOpenStores) + boolean doOpenStores, + int termInfosIndexDivisor) throws CorruptIndexException, IOException { SegmentReader instance; try { @@ -580,7 +626,7 @@ boolean success = false; try { - instance.core = new CoreReaders(dir, si, readBufferSize); + instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance.core.openDocStores(si); } @@ -876,12 +922,12 @@ public TermEnum terms() { ensureOpen(); - return core.tis.terms(); + return core.getTermsReader().terms(); } public TermEnum terms(Term t) throws IOException { ensureOpen(); - return core.tis.terms(t); + return core.getTermsReader().terms(t); } FieldInfos fieldInfos() { @@ -917,7 +963,7 @@ public int docFreq(Term t) throws IOException { ensureOpen(); - TermInfo ti = core.tis.get(t); + TermInfo ti = core.getTermsReader().get(t); if (ti != null) return ti.docFreq; else @@ -937,14 +983,6 @@ return si.docCount; } - public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - core.tis.setIndexDivisor(indexDivisor); - } - - public int getTermInfosIndexDivisor() { - return core.tis.getIndexDivisor(); - } - /** * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption) */ @@ -1099,6 +1137,19 @@ } } + boolean termsIndexLoaded() { + return core.termsIndexIsLoaded(); + } + + // NOTE: only called from IndexWriter when a near + // real-time reader is opened, or applyDeletes is run, + // sharing a segment that's still being merged. This + // method is not thread safe, and relies on the + // synchronization in IndexWriter + void loadTermsIndex(int termsIndexDivisor) throws IOException { + core.loadTermsIndex(si, termsIndexDivisor); + } + // for testing only boolean normsClosed() { if (singleNormStream != null) { @@ -1268,9 +1319,16 @@ } public long getUniqueTermCount() { - return core.tis.size(); + return core.getTermsReader().size(); } + /* + // nocommit + final TermInfosReader getTermInfosReader() { + return terms.getTermsReader(); + } + */ + /** * Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 793040) +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) @@ -49,12 +49,12 @@ synchronized (parent) { this.deletedDocs = parent.deletedDocs; } - this.skipInterval = parent.core.tis.getSkipInterval(); - this.maxSkipLevels = parent.core.tis.getMaxSkipLevels(); + this.skipInterval = parent.core.getTermsReader().getSkipInterval(); + this.maxSkipLevels = parent.core.getTermsReader().getMaxSkipLevels(); } public void seek(Term term) throws IOException { - TermInfo ti = parent.core.tis.get(term); + TermInfo ti = parent.core.getTermsReader().get(term); seek(ti, term); } @@ -69,7 +69,7 @@ ti = segmentTermEnum.termInfo(); } else { // punt case term = termEnum.term(); - ti = parent.core.tis.get(term); + ti = parent.core.getTermsReader().get(term); } seek(ti, term); Index: src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (revision 793040) +++ src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (working copy) @@ -23,16 +23,17 @@ import java.util.Map; class ReadOnlyDirectoryReader extends DirectoryReader { - ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy) throws IOException { - super(directory, sis, deletionPolicy, true); + ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) throws IOException { + super(directory, sis, deletionPolicy, true, termInfosIndexDivisor); } - ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone) throws IOException { - super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone); + ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone, + int termInfosIndexDivisor) throws IOException { + super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor); } - ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException { - super(writer, infos); + ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException { + super(writer, infos, termInfosIndexDivisor); } protected void acquireWriteLock() { Index: src/java/org/apache/lucene/index/DirectoryReader.java =================================================================== --- src/java/org/apache/lucene/index/DirectoryReader.java (revision 793040) +++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy) @@ -51,6 +51,7 @@ private Lock writeLock; private SegmentInfos segmentInfos; private boolean stale; + private final int termInfosIndexDivisor; private boolean rollbackHasChanges; private SegmentInfos rollbackSegmentInfos; @@ -62,25 +63,27 @@ private int numDocs = -1; private boolean hasDeletions = false; - static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { + static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, + final int termInfosIndexDivisor) throws CorruptIndexException, IOException { return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { SegmentInfos infos = new SegmentInfos(); infos.read(directory, segmentFileName); if (readOnly) - return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy); + return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor); else - return new DirectoryReader(directory, infos, deletionPolicy, false); + return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor); } }.run(commit); } /** Construct reading the named set of readers. */ - DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws IOException { + DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = sis; this.deletionPolicy = deletionPolicy; + this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously @@ -97,7 +100,7 @@ for (int i = sis.size()-1; i >= 0; i--) { boolean success = false; try { - readers[i] = SegmentReader.get(readOnly, sis.info(i)); + readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor); success = true; } finally { if (!success) { @@ -117,10 +120,11 @@ } // Used by near real-time search - DirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException { + DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; this.segmentInfos = infos; + this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: @@ -140,7 +144,7 @@ try { final SegmentInfo info = infos.info(upto); if (info.dir == dir) { - readers[upto++] = writer.readerPool.getReadOnlyClone(info, true); + readers[upto++] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor); } success = true; } finally { @@ -171,10 +175,11 @@ /** This contructor is only used for {@link #reopen()} */ DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, - Map oldNormsCache, boolean readOnly, boolean doClone) throws IOException { + Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor) throws IOException { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; + this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: @@ -218,7 +223,7 @@ assert !doClone; // this is a new reader; in case we hit an exception we can close it safely - newReader = SegmentReader.get(readOnly, infos.info(i)); + newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly); } @@ -426,10 +431,10 @@ private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException { DirectoryReader reader; - if (openReadOnly) { - reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone); + if (openReadOnly) { + reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); } else { - reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone); + reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); } reader.setDisableFakeNorms(getDisableFakeNorms()); return reader; @@ -827,18 +832,6 @@ return subReaders; } - public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].setTermInfosIndexDivisor(indexDivisor); - } - - public int getTermInfosIndexDivisor() throws IllegalStateException { - if (subReaders.length > 0) - return subReaders[0].getTermInfosIndexDivisor(); - else - throw new IllegalStateException("no readers"); - } - public void setDisableFakeNorms(boolean disableFakeNorms) { super.setDisableFakeNorms(disableFakeNorms); for (int i = 0; i < subReaders.length; i++) Index: src/java/org/apache/lucene/index/TermInfosReader.java =================================================================== --- src/java/org/apache/lucene/index/TermInfosReader.java (revision 793040) +++ src/java/org/apache/lucene/index/TermInfosReader.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.util.cache.Cache; import org.apache.lucene.util.cache.SimpleLRUCache; import org.apache.lucene.util.CloseableThreadLocal; @@ -30,22 +29,19 @@ * set. */ final class TermInfosReader { - private Directory directory; - private String segment; - private FieldInfos fieldInfos; + private final Directory directory; + private final String segment; + private final FieldInfos fieldInfos; - private CloseableThreadLocal threadResources = new CloseableThreadLocal(); - private SegmentTermEnum origEnum; - private long size; + private final CloseableThreadLocal threadResources = new CloseableThreadLocal(); + private final SegmentTermEnum origEnum; + private final long size; - private Term[] indexTerms = null; - private TermInfo[] indexInfos; - private long[] indexPointers; + private final Term[] indexTerms; + private final TermInfo[] indexInfos; + private final long[] indexPointers; - private SegmentTermEnum indexEnum; - - private int indexDivisor = 1; - private int totalIndexInterval; + private final int totalIndexInterval; private final static int DEFAULT_CACHE_SIZE = 1024; @@ -59,15 +55,14 @@ Cache termInfoCache; } - TermInfosReader(Directory dir, String seg, FieldInfos fis) + TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor) throws CorruptIndexException, IOException { - this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE); - } - - TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize) - throws CorruptIndexException, IOException { boolean success = false; + if (indexDivisor < 1 && indexDivisor != -1) { + throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); + } + try { directory = dir; segment = seg; @@ -76,11 +71,40 @@ origEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); size = origEnum.size; - totalIndexInterval = origEnum.indexInterval; - indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, - readBufferSize), fieldInfos, true); + if (indexDivisor != -1) { + // Load terms index + totalIndexInterval = origEnum.indexInterval * indexDivisor; + final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, + readBufferSize), fieldInfos, true); + + try { + int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index + + indexTerms = new Term[indexSize]; + indexInfos = new TermInfo[indexSize]; + indexPointers = new long[indexSize]; + + for (int i = 0; indexEnum.next(); i++) { + indexTerms[i] = indexEnum.term(); + indexInfos[i] = indexEnum.termInfo(); + indexPointers[i] = indexEnum.indexPointer; + + for (int j = 1; j < indexDivisor; j++) + if (!indexEnum.next()) + break; + } + } finally { + indexEnum.close(); + } + } else { + // Do not load terms index: + totalIndexInterval = -1; + indexTerms = null; + indexInfos = null; + indexPointers = null; + } success = true; } finally { // With lock-less commits, it's entirely possible (and @@ -102,48 +126,9 @@ return origEnum.maxSkipLevels; } - /** - *

Sets the indexDivisor, which subsamples the number - * of indexed terms loaded into memory. This has a - * similar effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1.

- * - * NOTE: you must call this before the term - * index is loaded. If the index is already loaded, - * an IllegalStateException is thrown. - * - + @throws IllegalStateException if the term index has - * already been loaded into memory. - */ - public void setIndexDivisor(int indexDivisor) throws IllegalStateException { - if (indexDivisor < 1) - throw new IllegalArgumentException("indexDivisor must be > 0: got " + indexDivisor); - - if (indexTerms != null) - throw new IllegalStateException("index terms are already loaded"); - - this.indexDivisor = indexDivisor; - totalIndexInterval = origEnum.indexInterval * indexDivisor; - } - - /** Returns the indexDivisor. - * @see #setIndexDivisor - */ - public int getIndexDivisor() { - return indexDivisor; - } - final void close() throws IOException { if (origEnum != null) origEnum.close(); - if (indexEnum != null) - indexEnum.close(); threadResources.close(); } @@ -164,31 +149,7 @@ return resources; } - private synchronized void ensureIndexIsRead() throws IOException { - if (indexTerms != null) // index already read - return; // do nothing - try { - int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index - indexTerms = new Term[indexSize]; - indexInfos = new TermInfo[indexSize]; - indexPointers = new long[indexSize]; - - for (int i = 0; indexEnum.next(); i++) { - indexTerms[i] = indexEnum.term(); - indexInfos[i] = indexEnum.termInfo(); - indexPointers[i] = indexEnum.indexPointer; - - for (int j = 1; j < indexDivisor; j++) - if (!indexEnum.next()) - break; - } - } finally { - indexEnum.close(); - indexEnum = null; - } - } - /** Returns the offset of the greatest index entry which is less than or equal to term.*/ private final int getIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] @@ -223,7 +184,7 @@ if (size == 0) return null; ensureIndexIsRead(); - + TermInfo ti; ThreadResources resources = getThreadResources(); Cache cache = null; @@ -302,6 +263,12 @@ return enumerator.term(); } + private void ensureIndexIsRead() { + if (indexTerms == null) { + throw new IllegalStateException("terms index was not loaded when this reader was created"); + } + } + /** Returns the position of a Term in the set or -1. */ final long getPosition(Term term) throws IOException { if (size == 0) return -1; Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 793040) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -115,6 +115,8 @@ private int refCount; + static int DEFAULT_TERMS_INDEX_DIVISOR = 1; + private boolean disableFakeNorms = false; /** Expert: returns the current refCount for this reader */ @@ -228,7 +230,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -265,7 +267,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -282,7 +284,7 @@ * Use {@link #open(Directory, boolean)} instead */ public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, null, null, false); + return open(directory, null, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Returns an IndexReader reading the index in the given @@ -296,7 +298,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, null, null, readOnly); + return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -308,7 +310,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, false); + return open(commit.getDirectory(), null, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in the given @@ -322,7 +324,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, readOnly); + return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -337,7 +339,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, false); + return open(directory, deletionPolicy, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -355,9 +357,37 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, readOnly); + return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } + /** Expert: returns an IndexReader reading the index in + * the given Directory, with a custom {@link + * IndexDeletionPolicy}. You should pass readOnly=true, + * since it gives much better concurrent performance, + * unless you intend to do write operations (delete + * documents or change norms) with the reader. + * @param directory the index directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); + } + /** Expert: returns a read/write IndexReader reading the index in the given * Directory, using a specific commit and with a custom * {@link IndexDeletionPolicy}. @@ -373,7 +403,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, false); + return open(commit.getDirectory(), deletionPolicy, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -393,13 +423,43 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, readOnly); + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } - private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { - return DirectoryReader.open(directory, deletionPolicy, commit, readOnly); + /** Expert: returns an IndexReader reading the index in + * the given Directory, using a specific commit and with + * a custom {@link IndexDeletionPolicy}. You should pass + * readOnly=true, since it gives much better concurrent + * performance, unless you intend to do write operations + * (delete documents or change norms) with the reader. + * @param commit the specific {@link IndexCommit} to open; + * see {@link IndexReader#listCommits} to list all commits + * in a directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } + private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); + } + /** * Refreshes an IndexReader if the index has changed since this instance * was (re)opened. @@ -668,17 +728,20 @@ * index is loaded. If the index is already loaded, * an IllegalStateException is thrown. * @throws IllegalStateException if the term index has already been loaded into memory + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. */ public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /**

For IndexReader implementations that use * TermInfosReader to read terms, this returns the * current indexDivisor. - * @see #setTermInfosIndexDivisor */ + * @see #setTermInfosIndexDivisor + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. + */ public int getTermInfosIndexDivisor() { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /** Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 793040) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -425,6 +425,23 @@ * @throws IOException */ public IndexReader getReader() throws IOException { + return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** Expert: like {@link #getReader}, except you can + * specify which termInfosIndexDivisor should be used for + * any newly opened readers. + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. */ + public IndexReader getReader(int termInfosIndexDivisor) throws IOException { if (infoStream != null) { message("flush at getReader"); } @@ -440,7 +457,7 @@ // reader; in theory we could do similar retry logic, // just like we do when loading segments_N synchronized(this) { - return new ReadOnlyDirectoryReader(this, segmentInfos); + return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor); } } @@ -590,8 +607,8 @@ // Returns a ref to a clone. NOTE: this clone is not // enrolled in the pool, so you should simply close() // it when you're done (ie, do not call release()). - public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores) throws IOException { - SegmentReader sr = get(info, doOpenStores); + public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException { + SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); try { return (SegmentReader) sr.clone(true); } finally { @@ -601,10 +618,10 @@ // Returns a ref public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException { - return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE); + return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } - public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize) throws IOException { + public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException { if (poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; @@ -615,10 +632,21 @@ // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: - sr = SegmentReader.get(info, readBufferSize, doOpenStores); + sr = SegmentReader.get(info, readBufferSize, doOpenStores, termsIndexDivisor); readerMap.put(info, sr); - } else if (doOpenStores) { - sr.openDocStores(); + } else { + if (doOpenStores) { + sr.openDocStores(); + } + if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) { + // If this reader was originally opened because we + // needed to merge it, we didn't load the terms + // index. But now, if the caller wants the terms + // index (eg because it's doing deletes, or an NRT + // reader is being opened) we ask the reader to + // load its terms index. + sr.loadTermsIndex(termsIndexDivisor); + } } // Return a ref to our caller @@ -3769,7 +3797,7 @@ SegmentReader sReader = null; synchronized(this) { if (segmentInfos.size() == 1) { // add existing index, if any - sReader = readerPool.get(segmentInfos.info(0), true); + sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1); } } @@ -4867,7 +4895,8 @@ // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores, - MERGE_READ_BUFFER_SIZE); + MERGE_READ_BUFFER_SIZE, + -1); // We clone the segment readers because other // deletes may come in while we're merging so we @@ -4923,7 +4952,7 @@ // keep deletes (it's costly to open entire reader // when we just need deletes) - final SegmentReader mergedReader = readerPool.get(merge.info, false); + final SegmentReader mergedReader = readerPool.get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, -1); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.warm(mergedReader);