Index: src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- src/test/org/apache/lucene/TestExternalCodecs.java (revision 928195) +++ src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -284,7 +284,7 @@ } @Override - public SeekStatus seek(BytesRef term) { + public SeekStatus seek(BytesRef term, boolean useCache) { current = term.utf8ToString(); it = null; if (ramField.termToDocs.containsKey(current)) { Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 928195) +++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -46,6 +46,7 @@ private BytesRef initialSeekTerm = null; private boolean doSeek = true; private BytesRef actualTerm = null; + private boolean useTermsCache = false; private final TermsEnum tenum; @@ -115,6 +116,16 @@ return t; } + /** Expert: enable or disable the terms cache when seeking. */ + protected void setUseTermsCache(boolean useTermsCache) { + this.useTermsCache = useTermsCache; + } + + /** Expert: enable or disable the terms cache when seeking. */ + protected boolean getUseTermsCache() { + return useTermsCache; + } + /** * Returns the related attributes, the returned {@link AttributeSource} * is shared with the delegate {@code TermsEnum}. @@ -148,7 +159,7 @@ * @throws UnsupportedOperationException */ @Override - public SeekStatus seek(BytesRef term) throws IOException { + public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } @@ -187,7 +198,7 @@ if (doSeek) { doSeek = false; final BytesRef t = nextSeekTerm(actualTerm); - if (t == null || tenum.seek(t) == SeekStatus.END) { + if (t == null || tenum.seek(t, useTermsCache) == SeekStatus.END) { // no more terms to seek to or enum exhausted return null; } Index: src/java/org/apache/lucene/search/AutomatonTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/AutomatonTermsEnum.java (revision 928195) +++ src/java/org/apache/lucene/search/AutomatonTermsEnum.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.util.BitSet; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -69,8 +68,10 @@ private final boolean finite; // array of sorted transitions for each state, indexed by state number private final Transition[][] allTransitions; - // for path tracking: each bit is a numbered state - private final BitSet visited; + // for path tracking: each long records gen when we last + // visited the state; we use gens to avoid having to clear + private final long[] visited; + private long curGen; // used for unicode conversion from BytesRef byte[] to char[] private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result(); // the reference used for seeking forwards through the term dictionary @@ -126,16 +127,18 @@ // we will seek each time anyway (and take the unicode conversion hit). // its also currently expensive to calculate, because getCommonSuffix is // a bit expensive. - commonSuffixRef = new BytesRef(""); + commonSuffixRef = null; // build a cache of sorted transitions for every state allTransitions = new Transition[runAutomaton.getSize()][]; for (State state : this.automaton.getStates()) allTransitions[state.getNumber()] = state.getSortedTransitionArray(false); // used for path tracking, where each bit is a numbered state. - visited = new BitSet(runAutomaton.getSize()); + visited = new long[runAutomaton.getSize()]; NO_MATCH = AcceptStatus.NO_AND_SEEK; YES_MATCH = finite ? AcceptStatus.YES_AND_SEEK : AcceptStatus.YES; } + + setUseTermsCache(finite); } /** @@ -196,7 +199,7 @@ */ @Override protected AcceptStatus accept(final BytesRef term) { - if (term.endsWith(commonSuffixRef)) { + if (commonSuffixRef == null || term.endsWith(commonSuffixRef)) { UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16); return runAutomaton.run(utf16.result, 0, utf16.length) ? YES_MATCH : NO_MATCH; } else { @@ -307,9 +310,10 @@ c++; } + curGen++; + utf16.setLength(position); - visited.clear(); - visited.set(state); + visited[state] = curGen; Transition transitions[] = allTransitions[state]; @@ -327,8 +331,8 @@ * as long as is possible, continue down the minimal path in * lexicographic order. if a loop or accept state is encountered, stop. */ - while (!visited.get(state) && !runAutomaton.isAccept(state)) { - visited.set(state); + while (visited[state] != curGen && !runAutomaton.isAccept(state)) { + visited[state] = curGen; /* * Note: we work with a DFA with no transitions to dead states. * so the below is ok, if it is not an accept state, Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 928195) +++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -229,8 +229,8 @@ } @Override - public SeekStatus seek(BytesRef text) throws IOException { - return actualEnum.seek(text); + public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { + return actualEnum.seek(text, useCache); } @Override Index: src/java/org/apache/lucene/index/LegacyFieldsEnum.java =================================================================== --- src/java/org/apache/lucene/index/LegacyFieldsEnum.java (revision 928195) +++ src/java/org/apache/lucene/index/LegacyFieldsEnum.java (working copy) @@ -89,7 +89,7 @@ } @Override - public SeekStatus seek(BytesRef text) throws IOException { + public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { if (terms != null) { terms.close(); } Index: src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/TermsEnum.java (revision 928195) +++ src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -54,11 +54,18 @@ * was hit. */ public static enum SeekStatus {END, FOUND, NOT_FOUND}; + /** Expert: just like {@link #seek(BytesRef)} but allows + * you to control whether the implementation should + * attempt to use its term cache (if it uses one). */ + public abstract SeekStatus seek(BytesRef text, boolean useCache) throws IOException; + /** Seeks to the specified term. Returns SeekResult to * indicate whether exact term was found, a different * term was found, or EOF was hit. The target term may * be befor or after the current term. */ - public abstract SeekStatus seek(BytesRef text) throws IOException; + public SeekStatus seek(BytesRef text) throws IOException { + return seek(text, true); + } /** Seeks to the specified term by ordinal (position) as * previously returned by {@link #ord}. The target ord @@ -124,7 +131,7 @@ */ public static final TermsEnum EMPTY = new TermsEnum() { @Override - public SeekStatus seek(BytesRef term) { return SeekStatus.END; } + public SeekStatus seek(BytesRef term, boolean useCache) { return SeekStatus.END; } @Override public SeekStatus seek(long ord) { return SeekStatus.END; } Index: src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 928195) +++ src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -136,11 +136,11 @@ } @Override - public SeekStatus seek(BytesRef term) throws IOException { + public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { queue.clear(); numTop = 0; for(int i=0;i