Index: src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexReader.java (revision 791535)
+++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -1800,4 +1800,40 @@
writer.close();
dir.close();
}
+
+ // LUCENE-1609: don't load terms index
+ public void testNoTermsIndex() throws Throwable {
+ Directory dir = new MockRAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ Document doc = new Document();
+ doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
+ doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexReader r = IndexReader.open(dir, null, true, -1);
+ try {
+ r.docFreq(new Term("field", "f"));
+ fail("did not hit expected exception");
+ } catch (IllegalStateException ise) {
+ // expected
+ }
+ assertFalse(((SegmentReader) r.getSequentialSubReaders()[0]).termsIndexLoaded());
+
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+ writer.addDocument(doc);
+ writer.close();
+
+ // LUCENE-1718: ensure re-open carries over no terms index:
+ IndexReader r2 = r.reopen();
+ r.close();
+ IndexReader[] subReaders = r2.getSequentialSubReaders();
+ assertEquals(2, subReaders.length);
+ for(int i=0;i<2;i++) {
+ assertFalse(((SegmentReader) subReaders[i]).termsIndexLoaded());
+ }
+ r2.close();
+ dir.close();
+ }
}
Index: src/test/org/apache/lucene/index/TestSegmentTermDocs.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 791535)
+++ src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy)
@@ -53,8 +53,7 @@
public void testTermDocs(int indexDivisor) throws IOException {
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -76,8 +75,7 @@
public void testBadSeek(int indexDivisor) throws IOException {
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -87,8 +85,7 @@
}
{
//After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(info);
- reader.setTermInfosIndexDivisor(indexDivisor);
+ SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@@ -123,9 +120,7 @@
writer.optimize();
writer.close();
- IndexReader reader = IndexReader.open(dir);
- reader.setTermInfosIndexDivisor(indexDivisor);
- assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
+ IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
TermDocs tdocs = reader.termDocs();
@@ -239,21 +234,6 @@
testBadSeek(2);
testSkipTo(2);
}
-
- public void testIndexDivisorAfterLoad() throws IOException {
- dir = new MockRAMDirectory();
- testDoc = new Document();
- DocHelper.setupDoc(testDoc);
- SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
- SegmentReader reader = SegmentReader.get(si);
- assertEquals(1, reader.docFreq(new Term("keyField", "Keyword")));
- try {
- reader.setTermInfosIndexDivisor(2);
- fail("did not hit IllegalStateException exception");
- } catch (IllegalStateException ise) {
- // expected
- }
- }
private void addDoc(IndexWriter writer, String value) throws IOException
{
Index: src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestSegmentReader.java (revision 791535)
+++ src/test/org/apache/lucene/index/TestSegmentReader.java (working copy)
@@ -203,20 +203,4 @@
assertTrue(results != null);
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
}
-
- public void testIndexDivisor() throws IOException {
- dir = new MockRAMDirectory();
- testDoc = new Document();
- DocHelper.setupDoc(testDoc);
- SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
-
- reader = SegmentReader.get(si);
- reader.setTermInfosIndexDivisor(3);
- testDocument();
- testDelete();
- testGetFieldNameVariations();
- testNorms();
- testTerms();
- testTermVectors();
- }
}
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (revision 791535)
+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -50,7 +50,7 @@
FieldInfos fieldInfos;
private FieldsReader fieldsReaderOrig = null;
CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal();
- TermInfosReader tis;
+ PrivateTermsDict terms;
TermVectorsReader termVectorsReaderOrig = null;
CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal();
@@ -83,6 +83,51 @@
CompoundFileReader cfsReader = null;
CompoundFileReader storeCFSReader = null;
+ // Holds a no-index-loaded TermInfosReader (tisNoIndex)
+ // and optionally also an index-loaded TermInfosReader
+ // (tis). This is needed only by IndexWriter, in the case
+ // where a segment is first opened for merging and then an
+ // NRT reader (or, applyDeletes) needs to open it for
+ // searching while sharing the same [pooled] reader.
+ private static final class PrivateTermsDict {
+ private TermInfosReader reader;
+ private TermInfosReader readerNoIndex;
+
+ PrivateTermsDict(TermInfosReader reader, TermInfosReader readerNoIndex) {
+ this.reader = reader;
+ this.readerNoIndex = readerNoIndex;
+ }
+
+ synchronized void setReader(TermInfosReader r) {
+ this.reader = r;
+ }
+
+ synchronized TermInfosReader getReader() {
+ if (reader != null) {
+ return reader;
+ } else {
+ return readerNoIndex;
+ }
+ }
+
+ synchronized void close() throws IOException {
+ if (reader != null) {
+ reader.close();
+ // null so if an app hangs on to us we still free most ram
+ reader = null;
+ }
+ if (readerNoIndex != null) {
+ readerNoIndex.close();
+ // null so if an app hangs on to us we still free most ram
+ readerNoIndex = null;
+ }
+ }
+
+ synchronized boolean indexIsLoaded() {
+ return reader != null;
+ }
+ }
+
/**
* Sets the initial value
*/
@@ -382,15 +427,15 @@
* @deprecated
*/
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
- return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
+ return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
- public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException {
- return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
+ public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
}
/**
@@ -398,8 +443,8 @@
* @throws IOException if there is a low-level IO error
* @deprecated
*/
- static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
- return get(false, si.dir, si, readBufferSize, doOpenStores);
+ static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
+ return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
}
/**
@@ -410,7 +455,8 @@
Directory dir,
SegmentInfo si,
int readBufferSize,
- boolean doOpenStores)
+ boolean doOpenStores,
+ int termInfosIndexDivisor)
throws CorruptIndexException, IOException {
SegmentReader instance;
try {
@@ -449,7 +495,12 @@
if (!instance.fieldInfos.fieldInfo(i).omitTermFreqAndPositions)
anyProx = true;
- instance.tis = new TermInfosReader(cfsDir, instance.segment, instance.fieldInfos, readBufferSize);
+ TermInfosReader reader = new TermInfosReader(cfsDir, instance.segment, instance.fieldInfos, readBufferSize, termInfosIndexDivisor);
+ if (termInfosIndexDivisor == -1) {
+ instance.terms = new PrivateTermsDict(null, reader);
+ } else {
+ instance.terms = new PrivateTermsDict(reader, null);
+ }
instance.loadDeletedDocs();
@@ -633,7 +684,7 @@
clone.storeCFSReader = storeCFSReader;
clone.fieldInfos = fieldInfos;
- clone.tis = tis;
+ clone.terms = terms;
clone.freqStream = freqStream;
clone.proxStream = proxStream;
clone.termVectorsReaderOrig = termVectorsReaderOrig;
@@ -756,11 +807,10 @@
if (coreRef.decRef() == 0) {
- // close everything, nothing is shared anymore with other readers
- if (tis != null) {
- tis.close();
- // null so if an app hangs on to us we still free most ram
- tis = null;
+ // close everything, nothing is shared anymore with
+ // other readers
+ if (terms != null) {
+ terms.close();
}
if (freqStream != null)
@@ -841,12 +891,12 @@
public TermEnum terms() {
ensureOpen();
- return tis.terms();
+ return getTermInfosReader().terms();
}
public TermEnum terms(Term t) throws IOException {
ensureOpen();
- return tis.terms(t);
+ return getTermInfosReader().terms(t);
}
FieldInfos getFieldInfos() {
@@ -882,7 +932,7 @@
public int docFreq(Term t) throws IOException {
ensureOpen();
- TermInfo ti = tis.get(t);
+ TermInfo ti = getTermInfosReader().get(t);
if (ti != null)
return ti.docFreq;
else
@@ -902,14 +952,6 @@
return si.docCount;
}
- public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
- tis.setIndexDivisor(indexDivisor);
- }
-
- public int getTermInfosIndexDivisor() {
- return tis.getIndexDivisor();
- }
-
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
*/
@@ -1064,6 +1106,35 @@
}
}
+ boolean termsIndexLoaded() {
+ return terms.indexIsLoaded();
+ }
+
+ // NOTE: only called from IndexWriter when a near
+ // real-time reader is opened, or applyDeletes is run,
+ // sharing a segment that's still being merged. This
+ // method is not thread safe, and relies on the
+ // synchronization in IndexWriter
+ void loadTermsIndex(int termsIndexDivisor) throws IOException {
+ if (!terms.indexIsLoaded()) {
+ Directory dir;
+ if (si.getUseCompoundFile()) {
+ // In some cases, we were originally opened when CFS
+ // was not used, but then we are asked to open the
+ // terms reader with index, the segment has switched
+ // to CFS
+ if (cfsReader == null) {
+ cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
+ }
+ dir = cfsReader;
+ } else {
+ dir = directory();
+ }
+
+ terms.setReader(new TermInfosReader(dir, segment, fieldInfos, readBufferSize, termsIndexDivisor));
+ }
+ }
+
// for testing only
boolean normsClosed() {
if (singleNormStream != null) {
@@ -1235,9 +1306,13 @@
}
public long getUniqueTermCount() {
- return tis.size();
+ return getTermInfosReader().size();
}
+ final TermInfosReader getTermInfosReader() {
+ return terms.getReader();
+ }
+
/**
* Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 791535)
+++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy)
@@ -49,12 +49,12 @@
synchronized (parent) {
this.deletedDocs = parent.deletedDocs;
}
- this.skipInterval = parent.tis.getSkipInterval();
- this.maxSkipLevels = parent.tis.getMaxSkipLevels();
+ this.skipInterval = parent.getTermInfosReader().getSkipInterval();
+ this.maxSkipLevels = parent.getTermInfosReader().getMaxSkipLevels();
}
public void seek(Term term) throws IOException {
- TermInfo ti = parent.tis.get(term);
+ TermInfo ti = parent.getTermInfosReader().get(term);
seek(ti, term);
}
@@ -69,7 +69,7 @@
ti = segmentTermEnum.termInfo();
} else { // punt case
term = termEnum.term();
- ti = parent.tis.get(term);
+ ti = parent.getTermInfosReader().get(term);
}
seek(ti, term);
Index: src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java
===================================================================
--- src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (revision 791535)
+++ src/java/org/apache/lucene/index/ReadOnlyDirectoryReader.java (working copy)
@@ -23,16 +23,17 @@
import java.util.Map;
class ReadOnlyDirectoryReader extends DirectoryReader {
- ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy) throws IOException {
- super(directory, sis, deletionPolicy, true);
+ ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) throws IOException {
+ super(directory, sis, deletionPolicy, true, termInfosIndexDivisor);
}
- ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone) throws IOException {
- super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone);
+ ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone,
+ int termInfosIndexDivisor) throws IOException {
+ super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor);
}
- ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
- super(writer, infos);
+ ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
+ super(writer, infos, termInfosIndexDivisor);
}
protected void acquireWriteLock() {
Index: src/java/org/apache/lucene/index/DirectoryReader.java
===================================================================
--- src/java/org/apache/lucene/index/DirectoryReader.java (revision 791535)
+++ src/java/org/apache/lucene/index/DirectoryReader.java (working copy)
@@ -51,6 +51,7 @@
private Lock writeLock;
private SegmentInfos segmentInfos;
private boolean stale;
+ private final int termInfosIndexDivisor;
private boolean rollbackHasChanges;
private SegmentInfos rollbackSegmentInfos;
@@ -62,25 +63,27 @@
private int numDocs = -1;
private boolean hasDeletions = false;
- static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException {
+ static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
+ final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
if (readOnly)
- return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy);
+ return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
else
- return new DirectoryReader(directory, infos, deletionPolicy, false);
+ return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
}
}.run(commit);
}
/** Construct reading the named set of readers. */
- DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws IOException {
+ DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = sis;
this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
@@ -97,7 +100,7 @@
for (int i = sis.size()-1; i >= 0; i--) {
boolean success = false;
try {
- readers[i] = SegmentReader.get(readOnly, sis.info(i));
+ readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
success = true;
} finally {
if (!success) {
@@ -117,10 +120,11 @@
}
// Used by near real-time search
- DirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
+ DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@@ -140,7 +144,7 @@
try {
final SegmentInfo info = infos.info(upto);
if (info.dir == dir) {
- readers[upto++] = writer.readerPool.getReadOnlyClone(info, true);
+ readers[upto++] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
}
success = true;
} finally {
@@ -171,10 +175,11 @@
/** This contructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
- Map oldNormsCache, boolean readOnly, boolean doClone) throws IOException {
+ Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@@ -218,7 +223,7 @@
assert !doClone;
// this is a new reader; in case we hit an exception we can close it safely
- newReader = SegmentReader.get(readOnly, infos.info(i));
+ newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
} else {
newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
}
@@ -426,10 +431,10 @@
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
- if (openReadOnly) {
- reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone);
+ if (openReadOnly) {
+ reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor);
} else {
- reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone);
+ reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor);
}
reader.setDisableFakeNorms(getDisableFakeNorms());
return reader;
@@ -827,18 +832,6 @@
return subReaders;
}
- public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].setTermInfosIndexDivisor(indexDivisor);
- }
-
- public int getTermInfosIndexDivisor() throws IllegalStateException {
- if (subReaders.length > 0)
- return subReaders[0].getTermInfosIndexDivisor();
- else
- throw new IllegalStateException("no readers");
- }
-
public void setDisableFakeNorms(boolean disableFakeNorms) {
super.setDisableFakeNorms(disableFakeNorms);
for (int i = 0; i < subReaders.length; i++)
Index: src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosReader.java (revision 791535)
+++ src/java/org/apache/lucene/index/TermInfosReader.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.cache.Cache;
import org.apache.lucene.util.cache.SimpleLRUCache;
import org.apache.lucene.util.CloseableThreadLocal;
@@ -30,22 +29,19 @@
* set. */
final class TermInfosReader {
- private Directory directory;
- private String segment;
- private FieldInfos fieldInfos;
+ private final Directory directory;
+ private final String segment;
+ private final FieldInfos fieldInfos;
- private CloseableThreadLocal threadResources = new CloseableThreadLocal();
- private SegmentTermEnum origEnum;
- private long size;
+ private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
+ private final SegmentTermEnum origEnum;
+ private final long size;
- private Term[] indexTerms = null;
- private TermInfo[] indexInfos;
- private long[] indexPointers;
+ private final Term[] indexTerms;
+ private final TermInfo[] indexInfos;
+ private final long[] indexPointers;
- private SegmentTermEnum indexEnum;
-
- private int indexDivisor = 1;
- private int totalIndexInterval;
+ private final int totalIndexInterval;
private final static int DEFAULT_CACHE_SIZE = 1024;
@@ -59,15 +55,14 @@
Cache termInfoCache;
}
- TermInfosReader(Directory dir, String seg, FieldInfos fis)
+ TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
throws CorruptIndexException, IOException {
- this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
- }
-
- TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize)
- throws CorruptIndexException, IOException {
boolean success = false;
+ if (indexDivisor < 1 && indexDivisor != -1) {
+ throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
+ }
+
try {
directory = dir;
segment = seg;
@@ -76,11 +71,40 @@
origEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_EXTENSION,
readBufferSize), fieldInfos, false);
size = origEnum.size;
- totalIndexInterval = origEnum.indexInterval;
- indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
- readBufferSize), fieldInfos, true);
+ if (indexDivisor != -1) {
+ // Load terms index
+ totalIndexInterval = origEnum.indexInterval * indexDivisor;
+ final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
+ readBufferSize), fieldInfos, true);
+
+ try {
+ int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
+
+ indexTerms = new Term[indexSize];
+ indexInfos = new TermInfo[indexSize];
+ indexPointers = new long[indexSize];
+
+ for (int i = 0; indexEnum.next(); i++) {
+ indexTerms[i] = indexEnum.term();
+ indexInfos[i] = indexEnum.termInfo();
+ indexPointers[i] = indexEnum.indexPointer;
+
+ for (int j = 1; j < indexDivisor; j++)
+ if (!indexEnum.next())
+ break;
+ }
+ } finally {
+ indexEnum.close();
+ }
+ } else {
+ // Do not load terms index:
+ totalIndexInterval = -1;
+ indexTerms = null;
+ indexInfos = null;
+ indexPointers = null;
+ }
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
@@ -102,48 +126,9 @@
return origEnum.maxSkipLevels;
}
- /**
- *
Sets the indexDivisor, which subsamples the number - * of indexed terms loaded into memory. This has a - * similar effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1.
- * - * NOTE: you must call this before the term - * index is loaded. If the index is already loaded, - * an IllegalStateException is thrown. - * - + @throws IllegalStateException if the term index has - * already been loaded into memory. - */ - public void setIndexDivisor(int indexDivisor) throws IllegalStateException { - if (indexDivisor < 1) - throw new IllegalArgumentException("indexDivisor must be > 0: got " + indexDivisor); - - if (indexTerms != null) - throw new IllegalStateException("index terms are already loaded"); - - this.indexDivisor = indexDivisor; - totalIndexInterval = origEnum.indexInterval * indexDivisor; - } - - /** Returns the indexDivisor. - * @see #setIndexDivisor - */ - public int getIndexDivisor() { - return indexDivisor; - } - final void close() throws IOException { if (origEnum != null) origEnum.close(); - if (indexEnum != null) - indexEnum.close(); threadResources.close(); } @@ -164,31 +149,7 @@ return resources; } - private synchronized void ensureIndexIsRead() throws IOException { - if (indexTerms != null) // index already read - return; // do nothing - try { - int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index - indexTerms = new Term[indexSize]; - indexInfos = new TermInfo[indexSize]; - indexPointers = new long[indexSize]; - - for (int i = 0; indexEnum.next(); i++) { - indexTerms[i] = indexEnum.term(); - indexInfos[i] = indexEnum.termInfo(); - indexPointers[i] = indexEnum.indexPointer; - - for (int j = 1; j < indexDivisor; j++) - if (!indexEnum.next()) - break; - } - } finally { - indexEnum.close(); - indexEnum = null; - } - } - /** Returns the offset of the greatest index entry which is less than or equal to term.*/ private final int getIndexOffset(Term term) { int lo = 0; // binary search indexTerms[] @@ -223,7 +184,7 @@ if (size == 0) return null; ensureIndexIsRead(); - + TermInfo ti; ThreadResources resources = getThreadResources(); Cache cache = null; @@ -302,6 +263,12 @@ return enumerator.term(); } + private void ensureIndexIsRead() { + if (indexTerms == null) { + throw new IllegalStateException("terms index was not loaded when this reader was created"); + } + } + /** Returns the position of a Term in the set or -1. */ final long getPosition(Term term) throws IOException { if (size == 0) return -1; Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 791535) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -115,6 +115,8 @@ private int refCount; + static int DEFAULT_TERMS_INDEX_DIVISOR = 1; + private boolean disableFakeNorms = false; /** Expert: returns the current refCount for this reader */ @@ -228,7 +230,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -265,7 +267,7 @@ final Directory dir = FSDirectory.getDirectory(path); IndexReader r = null; try { - r = open(dir, null, null, readOnly); + r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } finally { if (r == null) dir.close(); @@ -282,7 +284,7 @@ * Use {@link #open(Directory, boolean)} instead */ public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, null, null, false); + return open(directory, null, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Returns an IndexReader reading the index in the given @@ -296,7 +298,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, null, null, readOnly); + return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -308,7 +310,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, false); + return open(commit.getDirectory(), null, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in the given @@ -322,7 +324,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), null, commit, readOnly); + return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns a read/write IndexReader reading the index in the given @@ -337,7 +339,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, false); + return open(directory, deletionPolicy, null, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -355,9 +357,37 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, deletionPolicy, null, readOnly); + return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } + /** Expert: returns an IndexReader reading the index in + * the given Directory, with a custom {@link + * IndexDeletionPolicy}. You should pass readOnly=true, + * since it gives much better concurrent performance, + * unless you intend to do write operations (delete + * documents or change norms) with the reader. + * @param directory the index directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); + } + /** Expert: returns a read/write IndexReader reading the index in the given * Directory, using a specific commit and with a custom * {@link IndexDeletionPolicy}. @@ -373,7 +403,7 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, false); + return open(commit.getDirectory(), deletionPolicy, commit, false, DEFAULT_TERMS_INDEX_DIVISOR); } /** Expert: returns an IndexReader reading the index in @@ -393,13 +423,43 @@ * @throws IOException if there is a low-level IO error */ public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), deletionPolicy, commit, readOnly); + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } - private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { - return DirectoryReader.open(directory, deletionPolicy, commit, readOnly); + /** Expert: returns an IndexReader reading the index in + * the given Directory, using a specific commit and with + * a custom {@link IndexDeletionPolicy}. You should pass + * readOnly=true, since it gives much better concurrent + * performance, unless you intend to do write operations + * (delete documents or change norms) with the reader. + * @param commit the specific {@link IndexCommit} to open; + * see {@link IndexReader#listCommits} to list all commits + * in a directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } + private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); + } + /** * Refreshes an IndexReader if the index has changed since this instance * was (re)opened. @@ -668,17 +728,20 @@ * index is loaded. If the index is already loaded, * an IllegalStateException is thrown. * @throws IllegalStateException if the term index has already been loaded into memory + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. */ public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /**For IndexReader implementations that use * TermInfosReader to read terms, this returns the * current indexDivisor. - * @see #setTermInfosIndexDivisor */ + * @see #setTermInfosIndexDivisor + * @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead. + */ public int getTermInfosIndexDivisor() { - throw new UnsupportedOperationException("This reader does not support this method."); + throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader"); } /** Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 791535) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -425,6 +425,23 @@ * @throws IOException */ public IndexReader getReader() throws IOException { + return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + } + + /** Expert: like {@link #getReader}, except you can + * specify which termInfosIndexDivisor should be used for + * any newly opened readers. + * @param termInfosIndexDivisor Subsambles which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriter#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. */ + public IndexReader getReader(int termInfosIndexDivisor) throws IOException { if (infoStream != null) { message("flush at getReader"); } @@ -440,7 +457,7 @@ // reader; in theory we could do similar retry logic, // just like we do when loading segments_N synchronized(this) { - return new ReadOnlyDirectoryReader(this, segmentInfos); + return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor); } } @@ -590,8 +607,8 @@ // Returns a ref to a clone. NOTE: this clone is not // enrolled in the pool, so you should simply close() // it when you're done (ie, do not call release()). - public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores) throws IOException { - SegmentReader sr = get(info, doOpenStores); + public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException { + SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor); try { return (SegmentReader) sr.clone(true); } finally { @@ -601,10 +618,10 @@ // Returns a ref public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException { - return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE); + return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } - public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize) throws IOException { + public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException { if (poolReaders) { readBufferSize = BufferedIndexInput.BUFFER_SIZE; @@ -615,10 +632,21 @@ // TODO: we may want to avoid doing this while // synchronized // Returns a ref, which we xfer to readerMap: - sr = SegmentReader.get(info, readBufferSize, doOpenStores); + sr = SegmentReader.get(info, readBufferSize, doOpenStores, termsIndexDivisor); readerMap.put(info, sr); - } else if (doOpenStores) { - sr.openDocStores(); + } else { + if (doOpenStores) { + sr.openDocStores(); + } + if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) { + // If this reader was originally opened because we + // needed to merge it, we didn't load the terms + // index. But now, if the caller wants the terms + // index (eg because it's doing deletes, or an NRT + // reader is being opened) we ask the reader to + // load its terms index. + sr.loadTermsIndex(termsIndexDivisor); + } } // Return a ref to our caller @@ -4853,7 +4881,8 @@ // Hold onto the "live" reader; we will use this to // commit merged deletes SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores, - MERGE_READ_BUFFER_SIZE); + MERGE_READ_BUFFER_SIZE, + -1); // We clone the segment readers because other // deletes may come in while we're merging so we Index: tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentTermDocs.java =================================================================== --- tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (revision 791569) +++ tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentTermDocs.java (working copy) @@ -53,8 +53,7 @@ public void testTermDocs(int indexDivisor) throws IOException { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -76,8 +75,7 @@ public void testBadSeek(int indexDivisor) throws IOException { { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -87,8 +85,7 @@ } { //After adding the document, we should be able to read it back in - SegmentReader reader = SegmentReader.get(info); - reader.setTermInfosIndexDivisor(indexDivisor); + SegmentReader reader = SegmentReader.get(true, info, indexDivisor); assertTrue(reader != null); SegmentTermDocs segTermDocs = new SegmentTermDocs(reader); assertTrue(segTermDocs != null); @@ -123,9 +120,7 @@ writer.optimize(); writer.close(); - IndexReader reader = IndexReader.open(dir); - reader.setTermInfosIndexDivisor(indexDivisor); - assertEquals(indexDivisor, reader.getTermInfosIndexDivisor()); + IndexReader reader = IndexReader.open(dir, null, true, indexDivisor); TermDocs tdocs = reader.termDocs(); @@ -240,21 +235,6 @@ testSkipTo(2); } - public void testIndexDivisorAfterLoad() throws IOException { - dir = new MockRAMDirectory(); - testDoc = new Document(); - DocHelper.setupDoc(testDoc); - SegmentInfo si = DocHelper.writeDoc(dir, testDoc); - SegmentReader reader = SegmentReader.get(si); - assertEquals(1, reader.docFreq(new Term("keyField", "Keyword"))); - try { - reader.setTermInfosIndexDivisor(2); - fail("did not hit IllegalStateException exception"); - } catch (IllegalStateException ise) { - // expected - } - } - private void addDoc(IndexWriter writer, String value) throws IOException { Document doc = new Document(); Index: tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentReader.java =================================================================== --- tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentReader.java (revision 791569) +++ tags/lucene_2_4_back_compat_tests_20090704/src/test/org/apache/lucene/index/TestSegmentReader.java (working copy) @@ -200,20 +200,4 @@ assertTrue(results != null); assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4); } - - public void testIndexDivisor() throws IOException { - dir = new MockRAMDirectory(); - testDoc = new Document(); - DocHelper.setupDoc(testDoc); - SegmentInfo si = DocHelper.writeDoc(dir, testDoc); - - reader = SegmentReader.get(si); - reader.setTermInfosIndexDivisor(3); - testDocument(); - testDelete(); - testGetFieldNameVariations(); - testNorms(); - testTerms(); - testTermVectors(); - } } Index: tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/SegmentReader.java (revision 791569) +++ tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -228,6 +228,11 @@ return get(readOnly, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true); } + public static SegmentReader get(boolean readOnly, SegmentInfo si, int termsIndexDivisor) throws CorruptIndexException, IOException { + // stub + return null; + } + /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error Index: tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/IndexReader.java (revision 791569) +++ tags/lucene_2_4_back_compat_tests_20090704/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -279,6 +279,11 @@ return open(directory, false, deletionPolicy, null, readOnly); } + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + // stub + return null; + } + /** Expert: returns a read/write IndexReader reading the index in the given * Directory, using a specific commit and with a custom * {@link IndexDeletionPolicy}. NOTE: starting in @@ -312,6 +317,11 @@ return open(commit.getDirectory(), false, deletionPolicy, commit, readOnly); } + public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException { + // stub + return null; + } + private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { return DirectoryIndexReader.open(directory, closeDirectory, deletionPolicy, commit, readOnly); }