Index: src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexReader.java (revision 793040)
+++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -1800,4 +1800,40 @@
writer.close();
dir.close();
}
+
+ // LUCENE-1609: don't load terms index
+ public void testNoTermsIndex() throws Throwable {
+ Directory dir = new MockRAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ Document doc = new Document();
+ doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
+ doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader r = IndexReader.open(dir, null, true, -1);
+ try {
+ r.docFreq(new Term("field", "f"));
+ fail("did not hit expected exception");
+ } catch (IllegalStateException ise) {
+ // expected
+ }
+ assertFalse(((SegmentReader) r.getSequentialSubReaders()[0]).termsIndexLoaded());
+
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ writer.addDocument(doc);
+ writer.close();
+
+ // LUCENE-1718: ensure re-open carries over no terms index:
+ IndexReader r2 = r.reopen();
+ r.close();
+ IndexReader[] subReaders = r2.getSequentialSubReaders();
+ assertEquals(2, subReaders.length);
+ for(int i=0;i<2;i++) {
+ assertFalse(((SegmentReader) subReaders[i]).termsIndexLoaded());
+ }
+ r2.close();
+ dir.close();
+ }
}
Index: src/test/org/apache/lucene/index/TestSegmentTermDocs.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 793040)
+++ src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy)
@@ -53,8 +53,7 @@
public void testTermDocs(int indexDivisor) throws IOException {
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -76,8 +75,7 @@
public void testBadSeek(int indexDivisor) throws IOException {
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -87,8 +85,7 @@
}
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -123,9 +120,7 @@
writer.optimize();
writer.close();
- IndexReader reader = IndexReader.open(dir);
- reader.setTermInfosIndexDivisor(indexDivisor);
- assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
+ IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
TermDocs tdocs = reader.termDocs();
@@ -239,21 +234,6 @@
testBadSeek(2);
testSkipTo(2);
}
-
- public void testIndexDivisorAfterLoad() throws IOException {
- dir = new MockRAMDirectory();
- testDoc = new Document();
- DocHelper.setupDoc(testDoc);
- SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
- SegmentReader reader = SegmentReader.get(si);
- assertEquals(1, reader.docFreq(new Term("keyField", "Keyword")));
- try {
- reader.setTermInfosIndexDivisor(2);
- fail("did not hit IllegalStateException exception");
- } catch (IllegalStateException ise) {
- // expected
- }
- }
private void addDoc(IndexWriter writer, String value) throws IOException
{
Index: src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentReader.java (revision 793040)
+++ src/test/org/apache/lucene/index/TestSegmentReader.java (working copy)
@@ -203,20 +203,4 @@
assertTrue(results != null);
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
}
-
- public void testIndexDivisor() throws IOException {
- dir = new MockRAMDirectory();
- testDoc = new Document();
- DocHelper.setupDoc(testDoc);
- SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
-
- reader = SegmentReader.get(si);
- reader.setTermInfosIndexDivisor(3);
- testDocument();
- testDelete();
- testGetFieldNameVariations();
- testNorms();
- testTerms();
- testTermVectors();
- }
}
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (revision 793040)
+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -81,10 +81,12 @@
final FieldInfos fieldInfos;
final IndexInput freqStream;
final IndexInput proxStream;
+ final TermInfosReader tisNoIndex;
final Directory dir;
final Directory cfsDir;
final int readBufferSize;
+ final int termsIndexDivisor;
TermInfosReader tis;
FieldsReader fieldsReaderOrig;
@@ -92,7 +94,7 @@
CompoundFileReader cfsReader;
CompoundFileReader storeCFSReader;
- CoreReaders(Directory dir, SegmentInfo si, int readBufferSize) throws IOException {
+ CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
segment = si.name;
this.readBufferSize = readBufferSize;
this.dir = dir;
@@ -109,7 +111,14 @@
fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
- tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
+ this.termsIndexDivisor = termsIndexDivisor;
+ TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
+ if (termsIndexDivisor == -1) {
+ tisNoIndex = reader;
+ } else {
+ tis = reader;
+ tisNoIndex = null;
+ }
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
@@ -144,6 +153,38 @@
return cfsReader;
}
+ synchronized TermInfosReader getTermsReader() {
+ if (tis != null) {
+ return tis;
+ } else {
+ return tisNoIndex;
+ }
+ }
+
+ synchronized boolean termsIndexIsLoaded() {
+ return tis != null;
+ }
+
+ synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
+ if (tis == null) {
+ Directory dir0;
+ if (si.getUseCompoundFile()) {
+ // In some cases, we were originally opened when CFS
+ // was not used, but then we are asked to open the
+ // terms reader with index, the segment has switched
+ // to CFS
+ if (cfsReader == null) {
+ cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
+ }
+ dir0 = cfsReader;
+ } else {
+ dir0 = dir;
+ }
+
+ tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
+ }
+ }
+
synchronized void decRef() throws IOException {
if (ref.decRef() == 0) {
@@ -155,6 +196,10 @@
tis = null;
}
+ if (tisNoIndex != null) {
+ tisNoIndex.close();
+ }
+
if (freqStream != null) {
freqStream.close();
}
@@ -534,15 +579,15 @@
* @deprecated
*/
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
- return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
+ return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
- public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException {
- return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
+ public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
}
/**
@@ -550,8 +595,8 @@
* @throws IOException if there is a low-level IO error
* @deprecated
*/
- static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
- return get(false, si.dir, si, readBufferSize, doOpenStores);
+ static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
}
/**
@@ -562,7 +607,8 @@
Directory dir,
SegmentInfo si,
int readBufferSize,
- boolean doOpenStores)
+ boolean doOpenStores,
+ int termInfosIndexDivisor)
throws CorruptIndexException, IOException {
SegmentReader instance;
try {
@@ -580,7 +626,7 @@
boolean success = false;
try {
- instance.core = new CoreReaders(dir, si, readBufferSize);
+ instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
if (doOpenStores) {
instance.core.openDocStores(si);
}
@@ -876,12 +922,12 @@
public TermEnum terms() {
ensureOpen();
- return core.tis.terms();
+ return core.getTermsReader().terms();
}
public TermEnum terms(Term t) throws IOException {
ensureOpen();
- return core.tis.terms(t);
+ return core.getTermsReader().terms(t);
}
FieldInfos fieldInfos() {
@@ -917,7 +963,7 @@
public int docFreq(Term t) throws IOException {
ensureOpen();
- TermInfo ti = core.tis.get(t);
+ TermInfo ti = core.getTermsReader().get(t);
if (ti != null)
return ti.docFreq;
else
@@ -937,14 +983,6 @@
return si.docCount;
}
- public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
- core.tis.setIndexDivisor(indexDivisor);
- }
-
- public int getTermInfosIndexDivisor() {
- return core.tis.getIndexDivisor();
- }
-
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
*/
@@ -1099,6 +1137,19 @@
}
}
+ boolean termsIndexLoaded() {
+ return core.termsIndexIsLoaded();
+ }
+
+ // NOTE: only called from IndexWriter when a near
+ // real-time reader is opened, or applyDeletes is run,
+ // sharing a segment that's still being merged. This
+ // method is not thread safe, and relies on the
+ // synchronization in IndexWriter
+ void loadTermsIndex(int termsIndexDivisor) throws IOException {
+ core.loadTermsIndex(si, termsIndexDivisor);
+ }
+
// for testing only
boolean normsClosed() {
if (singleNormStream != null) {
@@ -1268,9 +1319,16 @@
}
public long getUniqueTermCount() {
- return core.tis.size();
+ return core.getTermsReader().size();
}
+ /*
+ // nocommit
+ final TermInfosReader getTermInfosReader() {
+ return terms.getTermsReader();
+ }
+ */
+
/**
* Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 793040)
+++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy)
@@ -49,12 +49,12 @@
synchronized (parent) {
this.deletedDocs = parent.deletedDocs;
}
- this.skipInterval = parent.core.tis.getSkipInterval();
- this.maxSkipLevels = parent.core.tis.getMaxSkipLevels();
+ this.skipInterval = parent.core.getTermsReader().getSkipInterval();
+ this.maxSkipLevels = parent.core.getTermsReader().getMaxSkipLevels();
}
public void seek(Term term) throws IOException {
- TermInfo ti = parent.core.tis.get(term);
+ TermInfo ti = parent.core.getTermsReader().get(term);
seek(ti, term);
}
@@ -69,7 +69,7 @@
ti = segmentTermEnum.termInfo();
} else { // punt case
term = termEnum.term();
- ti = parent.core.tis.get(term);
+ ti = parent.core.getTermsReader().get(term);
}
seek(ti, term);
Index: src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java
===================================================================
--- src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (revision 793040)
+++ src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (working copy)
@@ -23,16 +23,17 @@
import java.util.Map;
class ReadOnlyDirectoryReader extends DirectoryReader {
- ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy) throws IOException {
- super(directory, sis, deletionPolicy, true);
+ ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) throws IOException {
+ super(directory, sis, deletionPolicy, true, termInfosIndexDivisor);
}
- ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone) throws IOException {
- super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone);
+ ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone,
+ int termInfosIndexDivisor) throws IOException {
+ super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor);
}
- ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
- super(writer, infos);
+ ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
+ super(writer, infos, termInfosIndexDivisor);
}
protected void acquireWriteLock() {
Index: src/java/org/apache/lucene/index/DirectoryReader.java
===================================================================
--- src/java/org/apache/lucene/index/DirectoryReader.java (revision 793040)
+++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy)
@@ -51,6 +51,7 @@
private Lock writeLock;
private SegmentInfos segmentInfos;
private boolean stale;
+ private final int termInfosIndexDivisor;
private boolean rollbackHasChanges;
private SegmentInfos rollbackSegmentInfos;
@@ -62,25 +63,27 @@
private int numDocs = -1;
private boolean hasDeletions = false;
- static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException {
+ static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
+ final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
if (readOnly)
- return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy);
+ return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
else
- return new DirectoryReader(directory, infos, deletionPolicy, false);
+ return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
}
}.run(commit);
}
/** Construct reading the named set of readers. */
- DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws IOException {
+ DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = sis;
this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
@@ -97,7 +100,7 @@
for (int i = sis.size()-1; i >= 0; i--) {
boolean success = false;
try {
- readers[i] = SegmentReader.get(readOnly, sis.info(i));
+ readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
success = true;
} finally {
if (!success) {
@@ -117,10 +120,11 @@
}
// Used by near real-time search
- DirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
+ DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@@ -140,7 +144,7 @@
try {
final SegmentInfo info = infos.info(upto);
if (info.dir == dir) {
- readers[upto++] = writer.readerPool.getReadOnlyClone(info, true);
+ readers[upto++] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
}
success = true;
} finally {
@@ -171,10 +175,11 @@
/** This contructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
- Map oldNormsCache, boolean readOnly, boolean doClone) throws IOException {
+ Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@@ -218,7 +223,7 @@
assert !doClone;
// this is a new reader; in case we hit an exception we can close it safely
- newReader = SegmentReader.get(readOnly, infos.info(i));
+ newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
} else {
newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
}
@@ -426,10 +431,10 @@
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
- if (openReadOnly) {
- reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone);
+ if (openReadOnly) {
+ reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor);
} else {
- reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone);
+ reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor);
}
reader.setDisableFakeNorms(getDisableFakeNorms());
return reader;
@@ -827,18 +832,6 @@
return subReaders;
}
- public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].setTermInfosIndexDivisor(indexDivisor);
- }
-
- public int getTermInfosIndexDivisor() throws IllegalStateException {
- if (subReaders.length > 0)
- return subReaders[0].getTermInfosIndexDivisor();
- else
- throw new IllegalStateException("no readers");
- }
-
public void setDisableFakeNorms(boolean disableFakeNorms) {
super.setDisableFakeNorms(disableFakeNorms);
for (int i = 0; i < subReaders.length; i++)
Index: src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosReader.java (revision 793040)
+++ src/java/org/apache/lucene/index/TermInfosReader.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.cache.Cache;
import org.apache.lucene.util.cache.SimpleLRUCache;
import org.apache.lucene.util.CloseableThreadLocal;
@@ -30,22 +29,19 @@
* set. */
final class TermInfosReader {
- private Directory directory;
- private String segment;
- private FieldInfos fieldInfos;
+ private final Directory directory;
+ private final String segment;
+ private final FieldInfos fieldInfos;
- private CloseableThreadLocal threadResources = new CloseableThreadLocal();
- private SegmentTermEnum origEnum;
- private long size;
+ private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
+ private final SegmentTermEnum origEnum;
+ private final long size;
- private Term[] indexTerms = null;
- private TermInfo[] indexInfos;
- private long[] indexPointers;
+ private final Term[] indexTerms;
+ private final TermInfo[] indexInfos;
+ private final long[] indexPointers;
- private SegmentTermEnum indexEnum;
-
- private int indexDivisor = 1;
- private int totalIndexInterval;
+ private final int totalIndexInterval;
private final static int DEFAULT_CACHE_SIZE = 1024;
@@ -59,15 +55,14 @@
Cache termInfoCache;
}
- TermInfosReader(Directory dir, String seg, FieldInfos fis)
+ TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
throws CorruptIndexException, IOException {
- this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
- }
-
- TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize)
- throws CorruptIndexException, IOException {
boolean success = false;
+ if (indexDivisor < 1 && indexDivisor != -1) {
+ throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
+ }
+
try {
directory = dir;
segment = seg;
@@ -76,11 +71,40 @@
origEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_EXTENSION,
readBufferSize), fieldInfos, false);
size = origEnum.size;
- totalIndexInterval = origEnum.indexInterval;
- indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
- readBufferSize), fieldInfos, true);
+ if (indexDivisor != -1) {
+ // Load terms index
+ totalIndexInterval = origEnum.indexInterval * indexDivisor;
+ final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
+ readBufferSize), fieldInfos, true);
+
+ try {
+ int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
+
+ indexTerms = new Term[indexSize];
+ indexInfos = new TermInfo[indexSize];
+ indexPointers = new long[indexSize];
+
+ for (int i = 0; indexEnum.next(); i++) {
+ indexTerms[i] = indexEnum.term();
+ indexInfos[i] = indexEnum.termInfo();
+ indexPointers[i] = indexEnum.indexPointer;
+
+ for (int j = 1; j < indexDivisor; j++)
+ if (!indexEnum.next())
+ break;
+ }
+ } finally {
+ indexEnum.close();
+ }
+ } else {
+ // Do not load terms index:
+ totalIndexInterval = -1;
+ indexTerms = null;
+ indexInfos = null;
+ indexPointers = null;
+ }
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
@@ -102,48 +126,9 @@
return origEnum.maxSkipLevels;
}
- /**
- *
Sets the indexDivisor, which subsamples the number - * of indexed terms loaded into memory. This has a - * similar effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1.
- * - * NOTE: you must call this before the term - * index is loaded. If the index is already loaded, - * an IllegalStateException is thrown. - * - + @throws IllegalStateException if the term index has - * already been loaded into memory. - */ - public void setIndexDivisor(int indexDivisor) throws IllegalStateException { - if (indexDivisor < 1) - throw new IllegalArgumentException("indexDivisor must be > 0: got " + indexDivisor); - - if (indexTerms != null) - throw new IllegalStateException("index terms are already loaded"); - - this.indexDivisor = indexDivisor; - totalIndexInterval = origEnum.indexInterval * indexDivisor; - } - - /** Returns the indexDivisor. - * @see #setIndexDivisor - */ - public int getIndexDivisor() { - return indexDivisor; - } - final void close() throws IOException { if (origEnum != null) origEnum.close(); - if (indexEnum != null) - indexEnum.close(); threadResources.close(); } @@ -164,31 +149,7 @@ return resources; } - private synchronized void ensureIndexIsRead() throws IOException { - if (indexTerms != null) // index already read - return; // do nothing - try { - int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index - indexTerms = new Term[indexSize]; - indexInfos = new TermInfo[indexSize]; - indexPointers = new long[indexSize]; - - for (int i = 0; indexEnum.next(); i++) { - indexTerms[i] = indexEnum.term(); - indexInfos[i] = indexEnum.termInfo(); - indexPointers[i] = indexEnum.indexPointer; - - for (int j = 1; j < indexDivisor; j++) - if (!indexEnum.next()) - break; - } - } finally { - indexEnum.close(); - indexEnum = null; - } - } - /** Returns the offset of the greatest index entry which is less than or equal to term.*/ private final int getIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] @@ -223,7 +184,7 @@ if (size == 0) return null; ensureIndexIsRead(); - + TermInfo ti; ThreadResources resources = getThreadResources(); Cache cache = null; @@ -302,6 +263,12 @@ return enumerator.term(); } + private void ensureIndexIsRead() { + if (indexTerms == null) { + throw new IllegalStateException("terms index was not loaded when this reader was created"); + } + } + /** Returns the position of a Term in the set or -1. */ final long getPosition(Term term) throws IOException { if (size == 0) return -1; Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 793040) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -115,6 +115,8 @@ private int refCount; + static int DEFAULT_TERMS_INDEX_DIVISOR = 1; + private boolean disableFakeNorms = false; /** Expert: returns the current refCount for this reader */ @@ -228,7 +230,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -265,7 +267,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -282,7 +284,7 @@ * Use {@link #open(Directory, boolean)} instead */ public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, null, null, false); + return open(directory, null, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Returns an IndexReader reading the index in the given @@ -296,7 +298,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, null, null, readOnly); + return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -308,7 +310,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, false); + return open(commit.getDirectory(), null, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in the given @@ -322,7 +324,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, readOnly); + return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -337,7 +339,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, false); + return open(directory, deletionPolicy, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -355,9 +357,37 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, readOnly); + return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } + /** Expert: returns an IndexReader reading the index in + * the given Directory, with a custom {@link + * IndexDeletionPolicy}. You should pass readOnly=true, + * since it gives much better concurrent performance, + * unless you intend to do write operations (delete + * documents or change norms) with the reader. + * @param directory the index directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); + } + /** Expert: returns a read/write IndexReader reading the index in the given * Directory, using a specific commit and with a custom * {@link IndexDeletionPolicy}. @@ -373,7 +403,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, false); + return open(commit.getDirectory(), deletionPolicy, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -393,13 +423,43 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, readOnly); + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } - private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { - return DirectoryReader.open(directory, deletionPolicy, commit, readOnly); + /** Expert: returns an IndexReader reading the index in + * the given Directory, using a specific commit and with + * a custom {@link IndexDeletionPolicy}. You should pass + * readOnly=true, since it gives much better concurrent + * performance, unless you intend to do write operations + * (delete documents or change norms) with the reader. + * @param commit the specific {@link IndexCommit} to open; + * see {@link IndexReader#listCommits} to list all commits + * in a directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } + private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); + } + /** * Refreshes an IndexReader if the index has changed since this instance * was (re)opened. @@ -668,17 +728,20 @@ * index is loaded. If the index is already loaded, * an IllegalStateException is thrown. * @throws IllegalStateException if the term index has already been loaded into memory + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. */ public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /**For IndexReader implementations that use * TermInfosReader to read terms, this returns the * current indexDivisor. - * @see #setTermInfosIndexDivisor */ + * @see #setTermInfosIndexDivisor + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. + */ public int getTermInfosIndexDivisor() { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /** Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 793040) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -425,6 +425,23 @@ * @throws IOException */ public IndexReader getReader() throws IOException { + return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** Expert: like {@link #getReader}, except you can + * specify which termInfosIndexDivisor should be used for + * any newly opened readers. + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. */ + public IndexReader getReader(int termInfosIndexDivisor) throws IOException { if (infoStream != null) { message("flush at getReader"); } @@ -440,7 +457,7 @@ // reader; in theory we could do similar retry logic, // just like we do when loading segments_N synchronized(this) { - return new ReadOnlyDirectoryReader(this, segmentInfos); + return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor); } } @@ -590,8 +607,8 @@ // Returns a ref to a clone. NOTE: this clone is not // enrolled in the pool, so you should simply close() // it when you're done (ie, do not call release()). - public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores) throws IOException { - SegmentReader sr = get(info, doOpenStores); + public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException { + SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); try { return (SegmentReader) sr.clone(true); } finally { @@ -601,10 +618,10 @@ // Returns a ref public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException { - return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE); + return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } - public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize) throws IOException { + public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException { if (poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; @@ -615,10 +632,21 @@ // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: - sr = SegmentReader.get(info, readBufferSize, doOpenStores); + sr = SegmentReader.get(info, readBufferSize, doOpenStores, termsIndexDivisor); readerMap.put(info, sr); - } else if (doOpenStores) { - sr.openDocStores(); + } else { + if (doOpenStores) { + sr.openDocStores(); + } + if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) { + // If this reader was originally opened because we + // needed to merge it, we didn't load the terms + // index. But now, if the caller wants the terms + // index (eg because it's doing deletes, or an NRT + // reader is being opened) we ask the reader to + // load its terms index. + sr.loadTermsIndex(termsIndexDivisor); + } } // Return a ref to our caller @@ -3769,7 +3797,7 @@ SegmentReader sReader = null; synchronized(this) { if (segmentInfos.size() == 1) { // add existing index, if any - sReader = readerPool.get(segmentInfos.info(0), true); + sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1); } } @@ -4867,7 +4895,8 @@ // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores, - MERGE_READ_BUFFER_SIZE); + MERGE_READ_BUFFER_SIZE, + -1); // We clone the segment readers because other // deletes may come in while we're merging so we @@ -4923,7 +4952,7 @@ // keep deletes (it's costly to open entire reader // when we just need deletes) - final SegmentReader mergedReader = readerPool.get(merge.info, false); + final SegmentReader mergedReader = readerPool.get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, -1); try { if (poolReaders && mergedSegmentWarmer != null) { mergedSegmentWarmer.warm(mergedReader);