Index: lucene/CHANGES.txt
--- lucene/CHANGES.txt Tue Jun 21 04:53:16 2011 -0400
+++ lucene/CHANGES.txt Fri Jun 24 06:43:32 2011 -0400
@@ -281,6 +281,13 @@
* LUCENE-2953: In addition to changes in 3.x, PriorityQueue#initialize(int)
function was moved into the ctor. (Uwe Schindler, Yonik Seeley)
+* LUCENE-3225: Add TermsEnum.seekExact for faster seeking when you
+ don't need the ceiling term; renamed existing seek methods to either
+ seekCeil or seekExact; changed seekExact(ord) to return no value.
+ Fixed MemoryCodec and SimpleTextCodec to optimize the seekExact
+ case, and fixed places in Lucene to use seekExact when possible.
+ (Mike McCandless)
+
New features
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Fri Jun 24 06:43:32 2011 -0400
@@ -44,7 +44,20 @@
}
@Override
- public SeekStatus seek(BytesRef text, boolean useCache) {
+ public boolean seekExact(BytesRef text, boolean useCache) {
+ final Term t = new Term(field, text);
+ int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
+ if (loc < 0) {
+ return false;
+ } else {
+ upto = loc;
+ br.copy(text);
+ return true;
+ }
+ }
+
+ @Override
+ public SeekStatus seekCeil(BytesRef text, boolean useCache) {
final Term t = new Term(field, text);
int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
if (loc < 0) {
@@ -63,19 +76,10 @@
}
@Override
- public SeekStatus seek(long ord) {
+ public void seekExact(long ord) {
+ assert (start + (int) ord) < terms.length;
upto = start + (int) ord;
- if (upto >= terms.length) {
- return SeekStatus.END;
- }
-
- if (terms[upto].field() == field) {
- return SeekStatus.FOUND;
- } else {
- // make sure field was interned
- assert !terms[upto].field().equals(field);
- return SeekStatus.END;
- }
+ assert field.equals(terms[upto].field());
}
@Override
@@ -144,9 +148,9 @@
}
@Override
- public void seek(BytesRef term, TermState state) throws IOException {
+ public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
- seek(((OrdTermState)state).ord); // just use the ord for simplicity
+ seekExact(((OrdTermState)state).ord); // just use the ord for simplicity
}
}
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Fri Jun 24 06:43:32 2011 -0400
@@ -133,9 +133,9 @@
Term t = new Term("c", "danny");
TermsEnum aprioriTermEnum = MultiFields.getTerms(aprioriReader, t.field()).iterator();
- aprioriTermEnum.seek(new BytesRef(t.text()));
+ aprioriTermEnum.seekCeil(new BytesRef(t.text()));
TermsEnum testTermEnum = MultiFields.getTerms(testReader, t.field()).iterator();
- testTermEnum.seek(new BytesRef(t.text()));
+ testTermEnum.seekCeil(new BytesRef(t.text()));
assertEquals(aprioriTermEnum.term(), testTermEnum.term());
DocsEnum aprioriTermDocs = aprioriTermEnum.docs(MultiFields.getDeletedDocs(aprioriReader), null);
Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
--- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Jun 24 06:43:32 2011 -0400
@@ -860,7 +860,18 @@
}
@Override
- public SeekStatus seek(BytesRef text, boolean useCache) {
+ public boolean seekExact(BytesRef text, boolean useCache) {
+ termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
+ if (termUpto >= 0) {
+ br.copy(info.sortedTerms[termUpto].getKey());
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public SeekStatus seekCeil(BytesRef text, boolean useCache) {
termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
if (termUpto < 0) { // not found; choose successor
termUpto = -termUpto -1;
@@ -877,13 +888,9 @@
}
@Override
- public SeekStatus seek(long ord) {
+ public void seekExact(long ord) {
+ assert ord < info.sortedTerms.length;
termUpto = (int) ord;
- if (ord < info.sortedTerms.length) {
- return SeekStatus.FOUND;
- } else {
- return SeekStatus.END;
- }
}
@Override
@@ -939,9 +946,9 @@
}
@Override
- public void seek(BytesRef term, TermState state) throws IOException {
+ public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null;
- this.seek(((OrdTermState)state).ord);
+ this.seekExact(((OrdTermState)state).ord);
}
@Override
Index: lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
--- lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Fri Jun 24 06:43:32 2011 -0400
@@ -186,7 +186,7 @@
}
TermsEnum termsEnum = terms.iterator();
- if (termsEnum.seek(termText) != TermsEnum.SeekStatus.FOUND) {
+ if (termsEnum.seekCeil(termText) != TermsEnum.SeekStatus.FOUND) {
return 0;
}
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Fri Jun 24 06:43:32 2011 -0400
@@ -73,7 +73,7 @@
Document doc = ir.document(0);
assertEquals("0", doc.get("id"));
TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
- assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("1")));
assertNotSame("1", te.term().utf8ToString());
ir.close();
ir = IndexReader.open(dirs[1], true);
@@ -81,7 +81,7 @@
doc = ir.document(0);
assertEquals("1", doc.get("id"));
te = MultiFields.getTerms(ir, "id").iterator();
- assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("0")));
assertNotSame("0", te.term().utf8ToString());
ir.close();
@@ -91,10 +91,10 @@
assertEquals("2", doc.get("id"));
te = MultiFields.getTerms(ir, "id").iterator();
- assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("1")));
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("1")));
assertNotSame("1", te.term());
- assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef("0")));
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef("0")));
assertNotSame("0", te.term().utf8ToString());
ir.close();
for (Directory d : dirs)
@@ -132,7 +132,7 @@
// make sure the deleted doc is not here
TermsEnum te = MultiFields.getTerms(ir, "id").iterator();
Term t = new Term("id", (NUM_DOCS - 1) + "");
- assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seek(new BytesRef(t.text())));
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, te.seekCeil(new BytesRef(t.text())));
assertNotSame(t.text(), te.term().utf8ToString());
ir.close();
for (Directory d : dirs)
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java Fri Jun 24 06:43:32 2011 -0400
@@ -526,7 +526,7 @@
//System.out.println("trigger " + trigger);
shift = random.nextInt(trigger);
}
- termsEnum.seek(new BytesRef(""));
+ termsEnum.seekCeil(new BytesRef(""));
continue;
}
seenTermCount++;
Index: lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
--- lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Fri Jun 24 06:43:32 2011 -0400
@@ -154,14 +154,14 @@
Terms terms = fields.terms("f");
assertNotNull(terms);
TermsEnum te = terms.iterator();
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("quick")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("brown")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("fox")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("jumped")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("over")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("lazy")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("dog")));
- assertEquals(SeekStatus.FOUND, te.seek(new BytesRef("the")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("quick")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("brown")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("fox")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("jumped")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("over")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog")));
+ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the")));
DocsEnum de = te.docs(null, null);
assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
assertEquals(2, de.freq());
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
--- lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Fri Jun 24 06:43:32 2011 -0400
@@ -79,7 +79,7 @@
if (terms != null) {
br.copy(term.bytes());
- if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) {
+ if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
docs = termsEnum.docs(delDocs, docs);
while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
result.set(docs.docID());
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java Fri Jun 24 06:43:32 2011 -0400
@@ -63,7 +63,7 @@
TermsEnum termsEnum = terms.iterator();
boolean skip = false;
- TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getPrefix()));
+ TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java Fri Jun 24 06:43:32 2011 -0400
@@ -53,7 +53,7 @@
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
- TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(getTermText()));
+ TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java Fri Jun 24 06:43:32 2011 -0400
@@ -95,7 +95,7 @@
try {
TermsEnum termsEnum = terms.iterator();
- TermsEnum.SeekStatus status = termsEnum.seek(prefixRef);
+ TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
BytesRef text;
if (status == TermsEnum.SeekStatus.FOUND) {
text = prefixRef;
Index: lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
--- lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Fri Jun 24 06:43:32 2011 -0400
@@ -398,7 +398,7 @@
// System.out.println(" term=" + term);
- if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) {
+ if (termsEnum.seekExact(term.bytes(), false)) {
DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs);
if (docsEnum != null) {
Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java
--- lucene/src/java/org/apache/lucene/index/CheckIndex.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/CheckIndex.java Fri Jun 24 06:43:32 2011 -0400
@@ -847,7 +847,7 @@
// Test seek to last term:
if (lastTerm != null) {
- if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) {
+ if (terms.seekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) {
throw new RuntimeException("seek to last term " + lastTerm + " failed");
}
@@ -874,14 +874,14 @@
// Seek by ord
for(int i=seekCount-1;i>=0;i--) {
long ord = i*(termCount/seekCount);
- terms.seek(ord);
+ terms.seekExact(ord);
seekTerms[i] = new BytesRef(terms.term());
}
// Seek by term
long totDocCount = 0;
for(int i=seekCount-1;i>=0;i--) {
- if (terms.seek(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
+ if (terms.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
}
Index: lucene/src/java/org/apache/lucene/index/DocTermOrds.java
--- lucene/src/java/org/apache/lucene/index/DocTermOrds.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/DocTermOrds.java Fri Jun 24 06:43:32 2011 -0400
@@ -237,7 +237,7 @@
final TermsEnum te = terms.iterator();
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
//System.out.println("seekStart=" + seekStart.utf8ToString());
- if (te.seek(seekStart) == TermsEnum.SeekStatus.END) {
+ if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
// No terms match
return;
}
@@ -693,7 +693,7 @@
}
@Override
- public SeekStatus seek(BytesRef target, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
// already here
if (term != null && term.equals(target)) {
@@ -704,7 +704,7 @@
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
- TermsEnum.SeekStatus seekStatus = termsEnum.seek(target);
+ TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
@@ -717,7 +717,7 @@
if (startIdx == 0) {
// our target occurs *before* the first term
- TermsEnum.SeekStatus seekStatus = termsEnum.seek(target);
+ TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
ord = 0;
setTerm();
@@ -733,7 +733,7 @@
// so we don't need to seek.
} else {
// seek to the right block
- TermsEnum.SeekStatus seekStatus = termsEnum.seek(indexedTermsArray[startIdx]);
+ TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
@@ -754,16 +754,16 @@
}
@Override
- public SeekStatus seek(long targetOrd) throws IOException {
+ public void seekExact(long targetOrd) throws IOException {
int delta = (int) (targetOrd - ordBase - ord);
- //System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord);
+ //System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
if (delta < 0 || delta > indexInterval) {
final int idx = (int) (targetOrd >>> indexIntervalBits);
final BytesRef base = indexedTermsArray[idx];
//System.out.println(" do seek term=" + base.utf8ToString());
ord = idx << indexIntervalBits;
delta = (int) (targetOrd - ord);
- final TermsEnum.SeekStatus seekStatus = termsEnum.seek(base, true);
+ final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base, true);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
} else {
//System.out.println("seek w/in block");
@@ -772,15 +772,14 @@
while (--delta >= 0) {
BytesRef br = termsEnum.next();
if (br == null) {
- term = null;
- return null;
+ assert false;
+ return;
}
ord++;
}
setTerm();
- return term == null ? SeekStatus.END : SeekStatus.FOUND;
- //System.out.println(" return term=" + term.utf8ToString());
+ assert term != null;
}
private BytesRef setTerm() throws IOException {
@@ -794,8 +793,7 @@
}
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
- TermsEnum.SeekStatus status = termsEnum.seek(ord);
- assert status == TermsEnum.SeekStatus.FOUND;
+ termsEnum.seekExact(ord);
return termsEnum.term();
}
}
Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
--- lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Fri Jun 24 06:43:32 2011 -0400
@@ -99,7 +99,7 @@
final long ram = flushBytes + activeBytes;
final long ramBufferBytes = (long) (maxConfiguredRamBuffer * 1024 * 1024);
// take peakDelta into account - worst case is that all flushing, pending and blocked DWPT had maxMem and the last doc had the peakDelta
- final long expected = (long)(2 * (ramBufferBytes)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
+ final long expected = (2 * (ramBufferBytes)) + ((numPending + numFlushingDWPT() + numBlockedFlushes()) * peakDelta);
if (peakDelta < (ramBufferBytes >> 1)) {
/*
* if we are indexing with very low maxRamBuffer like 0.1MB memory can
Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
--- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Fri Jun 24 06:43:32 2011 -0400
@@ -132,13 +132,18 @@
public FilterTermsEnum(TermsEnum in) { this.in = in; }
@Override
- public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
- return in.seek(text, useCache);
+ public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
+ return in.seekExact(text, useCache);
}
@Override
- public SeekStatus seek(long ord) throws IOException {
- return in.seek(ord);
+ public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
+ return in.seekCeil(text, useCache);
+ }
+
+ @Override
+ public void seekExact(long ord) throws IOException {
+ in.seekExact(ord);
}
@Override
@@ -182,8 +187,8 @@
}
@Override
- public void seek(BytesRef term, TermState state) throws IOException {
- in.seek(term, state);
+ public void seekExact(BytesRef term, TermState state) throws IOException {
+ in.seekExact(term, state);
}
@Override
Index: lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
--- lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Fri Jun 24 06:43:32 2011 -0400
@@ -139,7 +139,7 @@
}
@Override
- public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+ public boolean seekExact(BytesRef term, boolean useCache) throws IOException {
queue.clear();
numTop = 0;
@@ -147,6 +147,56 @@
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
+
+ lastSeek = null;
+
+ for(int i=0;i Term enumerations are always ordered by
* {@link #getComparator}. Each term in the enumeration is
- * greater than all that precede it.null if the term does not exist.
*
* @see TermsEnum#termState()
- * @see TermsEnum#seek(BytesRef, TermState) */
+ * @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
- termsEnum.seek(term, termState);
+ termsEnum.seekExact(term, termState);
return termsEnum.docs(skipDocs, reuse);
}
@@ -110,10 +110,10 @@
* not indexed.
*
* @see TermsEnum#termState()
- * @see TermsEnum#seek(BytesRef, TermState) */
+ * @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
- termsEnum.seek(term, termState);
+ termsEnum.seekExact(term, termState);
return termsEnum.docsAndPositions(skipDocs, reuse);
}
Index: lucene/src/java/org/apache/lucene/index/TermsEnum.java
--- lucene/src/java/org/apache/lucene/index/TermsEnum.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/TermsEnum.java Fri Jun 24 06:43:32 2011 -0400
@@ -24,18 +24,20 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-/** Iterator to seek ({@link #seek}) or step through ({@link
- * #next} terms, obtain frequency information ({@link
- * #docFreq}), and obtain a {@link DocsEnum} or {@link
+/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
+ * #seekExact(BytesRef,boolean)}) or step through ({@link
+ * #next} terms to obtain frequency information ({@link
+ * #docFreq}), {@link DocsEnum} or {@link
* DocsAndPositionsEnum} for the current term ({@link
* #docs}.
*
*
On obtaining a TermsEnum, you must first call - * {@link #next} or {@link #seek}. + *
The TermsEnum is unpositioned when you first obtain it
+ * and you must first successfully call {@link #next} or one
+ * of the seek methods.
*
* @lucene.experimental */
public abstract class TermsEnum {
@@ -48,31 +50,41 @@
return atts;
}
- /** Represents returned result from {@link #seek}.
+ /** Represents returned result from {@link #seekCeil}.
* If status is FOUND, then the precise term was found.
* If status is NOT_FOUND, then a different term was
* found. If the status is END, the end of the iteration
* was hit. */
public static enum SeekStatus {END, FOUND, NOT_FOUND};
- /** Expert: just like {@link #seek(BytesRef)} but allows
+ /** Attemps to seek to the exact term, returning
+ * true if the term is found. If this returns false, the
+ * enum is unpositioned. For some codecs, seekExact may
+ * be substantially faster than {@link #seekCeil}. */
+ public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
+ return seekCeil(text, useCache) == SeekStatus.FOUND;
+ }
+
+ /** Expert: just like {@link #seekCeil(BytesRef)} but allows
* you to control whether the implementation should
* attempt to use its term cache (if it uses one). */
- public abstract SeekStatus seek(BytesRef text, boolean useCache) throws IOException;
+ public abstract SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException;
- /** Seeks to the specified term. Returns SeekStatus to
+ /** Seeks to the specified term, if it exists, or to the
+ * next (ceiling) term. Returns SeekStatus to
* indicate whether exact term was found, a different
* term was found, or EOF was hit. The target term may
- * be before or after the current term. */
- public final SeekStatus seek(BytesRef text) throws IOException {
- return seek(text, true);
+ * be before or after the current term. If this returns
+ * SeekStatus.END, the enum is unpositioned. */
+ public final SeekStatus seekCeil(BytesRef text) throws IOException {
+ return seekCeil(text, true);
}
/** Seeks to the specified term by ordinal (position) as
* previously returned by {@link #ord}. The target ord
- * may be before or after the current ord. See {@link
- * #seek(BytesRef)}. */
- public abstract SeekStatus seek(long ord) throws IOException;
+ * may be before or after the current ord, and must be
+ * within bounds. */
+ public abstract void seekExact(long ord) throws IOException;
/**
* Expert: Seeks a specific position by {@link TermState} previously obtained
@@ -82,8 +94,7 @@
*
* Seeking by {@link TermState} should only be used iff the enum the state was * obtained from and the enum the state is used for seeking are obtained from - * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can - * leave the enum in undefined state. + * the same {@link IndexReader}. *
* NOTE: Using this method with an incompatible {@link TermState} might leave
* this {@link TermsEnum} in undefined state. On a segment level
@@ -97,32 +108,30 @@
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
* */
- public void seek(BytesRef term, TermState state) throws IOException {
- seek(term);
+ public void seekExact(BytesRef term, TermState state) throws IOException {
+ if (!seekExact(term, true)) {
+ throw new IllegalArgumentException("term=" + term + " does not exist");
+ }
}
- /** Increments the enumeration to the next element.
+ /** Increments the enumeration to the next term.
* Returns the resulting term, or null if the end was
- * hit. The returned BytesRef may be re-used across calls
- * to next. */
+ * hit (which means the enum is unpositioned). The
+ * returned BytesRef may be re-used across calls to next. */
public abstract BytesRef next() throws IOException;
- /** Returns current term. Do not call this before calling
- * next() for the first time, after next() returns null
- * or after seek returns {@link SeekStatus#END}.*/
+ /** Returns current term. Do not call this when the enum
+ * is unpositioned. */
public abstract BytesRef term() throws IOException;
/** Returns ordinal position for current term. This is an
* optional method (the codec may throw {@link
* UnsupportedOperationException}). Do not call this
- * before calling {@link #next} for the first time or after
- * {@link #next} returns null or {@link #seek} returns
- * END; */
+ * when the enum is unpositioned. */
public abstract long ord() throws IOException;
/** Returns the number of documents containing the current
- * term. Do not call this before calling next() for the
- * first time, after next() returns null or seek returns
+ * term. Do not call this when the enum is unpositioned.
* {@link SeekStatus#END}.*/
public abstract int docFreq() throws IOException;
@@ -135,9 +144,8 @@
public abstract long totalTermFreq() throws IOException;
/** Get {@link DocsEnum} for the current term. Do not
- * call this before calling {@link #next} or {@link
- * #seek} for the first time. This method will not
- * return null.
+ * call this when the enum is unpositioned. This method
+ * will not return null.
*
* @param skipDocs set bits are documents that should not
* be returned
@@ -145,10 +153,9 @@
public abstract DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException;
/** Get {@link DocsAndPositionsEnum} for the current term.
- * Do not call this before calling {@link #next} or
- * {@link #seek} for the first time. This method will
- * only return null if positions were not indexed into
- * the postings by this codec. */
+ * Do not call this when the enum is unpositioned.
+ * This method will only return null if positions were
+ * not indexed into the postings by this codec. */
public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
/**
@@ -160,7 +167,7 @@
* {@link AttributeSource} states separately
*
* @see TermState
- * @see #seek(BytesRef, TermState)
+ * @see #seekExact(BytesRef, TermState)
*/
public TermState termState() throws IOException {
return new TermState() {
@@ -186,10 +193,10 @@
*/
public static final TermsEnum EMPTY = new TermsEnum() {
@Override
- public SeekStatus seek(BytesRef term, boolean useCache) { return SeekStatus.END; }
+ public SeekStatus seekCeil(BytesRef term, boolean useCache) { return SeekStatus.END; }
@Override
- public SeekStatus seek(long ord) { return SeekStatus.END; }
+ public void seekExact(long ord) {}
@Override
public BytesRef term() {
@@ -242,7 +249,7 @@
}
@Override
- public void seek(BytesRef term, TermState state) throws IOException {
+ public void seekExact(BytesRef term, TermState state) throws IOException {
throw new IllegalStateException("this method should never be called");
}
};
Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
--- lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Fri Jun 24 06:43:32 2011 -0400
@@ -302,7 +302,7 @@
only valid if didIndexNext is true: */
private BytesRef nextIndexTerm;
- /* True after seek(TermState), do defer seeking. If the app then
+ /* True after seekExact(TermState), do defer seeking. If the app then
calls next() (which is not "typical"), then we'll do the real seek */
private boolean seekPending;
@@ -348,7 +348,7 @@
// return NOT_FOUND so it's a waste for us to fill in
// the term that was actually NOT_FOUND
@Override
- public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
+ public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
@@ -376,7 +376,7 @@
if (cachedState != null) {
seekPending = true;
//System.out.println(" cached!");
- seek(target, cachedState);
+ seekExact(target, cachedState);
//System.out.println(" term=" + term.utf8ToString());
return SeekStatus.FOUND;
}
@@ -711,7 +711,7 @@
}
@Override
- public void seek(BytesRef target, TermState otherState) throws IOException {
+ public void seekExact(BytesRef target, TermState otherState) throws IOException {
//System.out.println("BTR.seek termState target=" + target.utf8ToString() + " " + target + " this=" + this);
assert otherState != null && otherState instanceof BlockTermState;
assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
@@ -731,16 +731,13 @@
}
@Override
- public SeekStatus seek(long ord) throws IOException {
+ public void seekExact(long ord) throws IOException {
//System.out.println("BTR.seek by ord ord=" + ord);
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
}
- if (ord >= numTerms) {
- state.ord = numTerms-1;
- return SeekStatus.END;
- }
+ assert ord < numTerms;
// TODO: if ord is in same terms block and
// after current ord, we should avoid this seek just
@@ -768,9 +765,6 @@
left--;
assert indexIsCurrent;
}
-
- // always found
- return SeekStatus.FOUND;
}
@Override
Index: lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
--- lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java Fri Jun 24 06:43:32 2011 -0400
@@ -561,7 +561,15 @@
}
@Override
- public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ if (VERBOSE) System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
+ current = fstEnum.seekExact(text);
+ didDecode = false;
+ return current != null;
+ }
+
+ @Override
+ public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
if (VERBOSE) System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekCeil(text);
if (current == null) {
@@ -656,7 +664,7 @@
}
@Override
- public SeekStatus seek(long ord) {
+ public void seekExact(long ord) {
// NOTE: we could add this...
throw new UnsupportedOperationException();
}
Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
--- lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Fri Jun 24 06:43:32 2011 -0400
@@ -745,7 +745,7 @@
}
@Override
- public SeekStatus seek(long ord) throws IOException {
+ public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
@@ -755,7 +755,7 @@
}
@Override
- public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+ public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
if (DEBUG_SURROGATES) {
System.out.println("TE.seek target=" + UnicodeUtil.toHexString(term.utf8ToString()));
}
Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
--- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Tue Jun 21 04:53:16 2011 -0400
+++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Jun 24 06:43:32 2011 -0400
@@ -131,7 +131,23 @@
}
@Override
- public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
+ public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
+
+ final BytesRefFSTEnum.InputOutput