Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 768446) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -87,6 +87,7 @@ public boolean next() throws IOException { return in.next(); } public Term term() { return in.term(); } public int docFreq() { return in.docFreq(); } + public int getLastDocId() { return in.getLastDocId(); } public void close() throws IOException { in.close(); } } Index: src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 768446) +++ src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -352,7 +352,7 @@ long skipPointer = skipListWriter.writeSkip(freqOut); // Write term - termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); + termInfo.set(df, lastDoc, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); // TODO: we could do this incrementally UnicodeUtil.UTF16toUTF8(text, start, termsUTF8); Index: src/java/org/apache/lucene/index/MultiSegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiSegmentReader.java (revision 768446) +++ src/java/org/apache/lucene/index/MultiSegmentReader.java (working copy) @@ -453,6 +453,7 @@ private Term term; private int docFreq; + private int maxDocId; public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) throws IOException { @@ -487,10 +488,12 @@ term = top.term; docFreq = 0; + maxDocId = -1; while (top != null && term.compareTo(top.term) == 0) { queue.pop(); docFreq += top.termEnum.docFreq(); // increment freq + maxDocId = Math.max(maxDocId, top.base + top.termEnum.getLastDocId()); if (top.next()) queue.put(top); // restore queue else @@ -507,6 +510,10 @@ public int docFreq() { return docFreq; } + + public int getLastDocId(){ + return maxDocId; + } public void close() throws IOException { queue.close(); Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 768446) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -510,7 +510,14 @@ return termEnum.docFreq(); } + + public int getLastDocId() { + if (termEnum==null) + return 0; + return termEnum.getLastDocId(); + } + public void close() throws IOException { if (termEnum!=null) termEnum.close(); Index: src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- src/java/org/apache/lucene/index/SegmentMerger.java (revision 768446) +++ src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -579,18 +579,22 @@ else proxPointer = 0; - int df; + int[] termData; + int df,lastDoc; if (fieldInfos.fieldInfo(smis[0].term.field).omitTf) { // append posting data - df = appendPostingsNoTf(smis, n); + termData = appendPostingsNoTf(smis, n); } else{ - df = appendPostings(smis, n); + termData = appendPostings(smis, n); } + df = termData[0]; + lastDoc = termData[1]; + long skipPointer = skipListWriter.writeSkip(freqOutput); if (df > 0) { // add an entry to the dictionary with pointers to prox and freq files - termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); + termInfo.set(df, lastDoc, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); termInfosWriter.add(smis[0].term, termInfo); } @@ -617,8 +621,9 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - private final int appendPostings(SegmentMergeInfo[] smis, int n) + private final int[] appendPostings(SegmentMergeInfo[] smis, int n) throws CorruptIndexException, IOException { + int[] retVal = new int[2]; int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.resetSkip(); @@ -689,7 +694,9 @@ } } } - return df; + retVal[0]=df; + retVal[1]=lastDoc; + return retVal; } /** Process postings from multiple segments without tf, all positioned on the @@ -701,8 +708,9 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - private final int appendPostingsNoTf(SegmentMergeInfo[] smis, int n) + private final int[] appendPostingsNoTf(SegmentMergeInfo[] smis, int n) throws CorruptIndexException, IOException { + int[] retVal = new int[2]; int lastDoc = 0; int df = 0; // number of docs w/ term skipListWriter.resetSkip(); @@ -736,7 +744,9 @@ freqOutput.writeVInt(docCode); // write doc & freq=1 } } - return df; + retVal[0]=df; + retVal[1]=lastDoc; + return retVal; } private void mergeNorms() throws IOException { Index: src/java/org/apache/lucene/index/SegmentTermEnum.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermEnum.java (revision 768446) +++ src/java/org/apache/lucene/index/SegmentTermEnum.java (working copy) @@ -127,6 +127,7 @@ termBuffer.read(input, fieldInfos); termInfo.docFreq = input.readVInt(); // read doc freq + termInfo.maxDocId = input.readVInt(); termInfo.freqPointer += input.readVLong(); // read freq pointer termInfo.proxPointer += input.readVLong(); // read prox pointer @@ -189,6 +190,10 @@ public final int docFreq() { return termInfo.docFreq; } + + public final int getLastDocId(){ + return termInfo.maxDocId; + } /* Returns the freqPointer from the current TermInfo in the enumeration. Initially invalid, valid after next() called for the first time.*/ Index: src/java/org/apache/lucene/index/TermEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermEnum.java (revision 768446) +++ src/java/org/apache/lucene/index/TermEnum.java (working copy) @@ -33,6 +33,9 @@ /** Returns the docFreq of the current Term in the enumeration.*/ public abstract int docFreq(); + + /** Returns the last docid in the posting */ + public abstract int getLastDocId(); /** Closes the enumeration to further activity, freeing resources. */ public abstract void close() throws IOException; Index: src/java/org/apache/lucene/index/TermInfo.java =================================================================== --- src/java/org/apache/lucene/index/TermInfo.java (revision 768446) +++ src/java/org/apache/lucene/index/TermInfo.java (working copy) @@ -22,6 +22,7 @@ final class TermInfo { /** The number of documents which contain the term. */ int docFreq = 0; + int maxDocId = -1; long freqPointer = 0; long proxPointer = 0; @@ -29,22 +30,25 @@ TermInfo() {} - TermInfo(int df, long fp, long pp) { + TermInfo(int df, int maxId,long fp, long pp) { docFreq = df; + maxDocId = maxId; freqPointer = fp; proxPointer = pp; } TermInfo(TermInfo ti) { docFreq = ti.docFreq; + maxDocId = ti.maxDocId; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipOffset = ti.skipOffset; } - final void set(int docFreq, + final void set(int docFreq, int maxId, long freqPointer, long proxPointer, int skipOffset) { this.docFreq = docFreq; + this.maxDocId = maxId; this.freqPointer = freqPointer; this.proxPointer = proxPointer; this.skipOffset = skipOffset; @@ -52,6 +56,7 @@ final void set(TermInfo ti) { docFreq = ti.docFreq; + maxDocId = ti.maxDocId; freqPointer = ti.freqPointer; proxPointer = ti.proxPointer; skipOffset = ti.skipOffset; Index: src/java/org/apache/lucene/index/TermInfosWriter.java =================================================================== --- src/java/org/apache/lucene/index/TermInfosWriter.java (revision 768446) +++ src/java/org/apache/lucene/index/TermInfosWriter.java (working copy) @@ -171,6 +171,7 @@ writeTerm(fieldNumber, termBytes, termBytesLength); // write term output.writeVInt(ti.docFreq); // write doc freq + output.writeVInt(ti.maxDocId); output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.proxPointer - lastTi.proxPointer); Index: src/java/org/apache/lucene/search/FilteredTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 768446) +++ src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy) @@ -58,6 +58,11 @@ return actualEnum.docFreq(); } + public int getLastDocId() { + if (actualEnum == null) return -1; + return actualEnum.getLastDocId(); + } + /** Increments the enumeration to the next element. True if one exists. */ public boolean next() throws IOException { if (actualEnum == null) return false; // the actual enumerator is not initialized! Index: src/test/org/apache/lucene/index/TestSegmentTermEnum.java =================================================================== --- src/test/org/apache/lucene/index/TestSegmentTermEnum.java (revision 739099) +++ src/test/org/apache/lucene/index/TestSegmentTermEnum.java (working copy) @@ -47,14 +47,17 @@ // add 100 documents with terms: aaa bbb // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100 for (int i = 0; i < 100; i++) { + addDoc(writer, "aaa bbb"); addDoc(writer, "aaa"); - addDoc(writer, "aaa bbb"); } writer.close(); // verify document frequency of terms in an unoptimized index verifyDocFreq(); + + // verify last document id + verifyLastDocId(); // merge segments by optimizing the index writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); @@ -82,11 +85,11 @@ assertEquals("bbb", termEnum.prev().text()); } + private void verifyDocFreq() - throws IOException - { - IndexReader reader = IndexReader.open(dir); - TermEnum termEnum = null; + throws IOException{ + IndexReader reader = IndexReader.open(dir); + TermEnum termEnum = null; // create enumeration of all terms termEnum = reader.terms(); @@ -118,6 +121,41 @@ termEnum.close(); } + private void verifyLastDocId() throws IOException + { + IndexReader reader = IndexReader.open(dir); + TermEnum termEnum = null; + + // create enumeration of all terms + termEnum = reader.terms(); + // go to the first term (aaa) + termEnum.next(); + // assert that term is 'aaa' + assertEquals("aaa", termEnum.term().text()); + assertEquals(199, termEnum.getLastDocId()); + // go to the second term (bbb) + termEnum.next(); + // assert that term is 'bbb' + assertEquals("bbb", termEnum.term().text()); + assertEquals(198, termEnum.getLastDocId()); + + termEnum.close(); + + + // create enumeration of terms after term 'aaa', including 'aaa' + termEnum = reader.terms(new Term("content", "aaa")); + // assert that term is 'aaa' + assertEquals("aaa", termEnum.term().text()); + assertEquals(199, termEnum.getLastDocId()); + // go to term 'bbb' + termEnum.next(); + // assert that term is 'bbb' + assertEquals("bbb", termEnum.term().text()); + assertEquals(198, termEnum.getLastDocId()); + + termEnum.close(); + } + private void addDoc(IndexWriter writer, String value) throws IOException { Document doc = new Document();