Index: lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java (revision 1052924) +++ lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java (working copy) @@ -148,7 +148,10 @@ */ private void assertSame(String regexp) throws IOException { RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE); + // nocommit: for testing + smart.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FIELDCACHE_REWRITE); DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE); + dumb.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); // we can't compare the two if automaton rewrites to a simpler enum. // for example: "a\uda07\udcc7?.*?" gets rewritten to a simpler query: Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1052924) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -95,7 +95,22 @@ return CONSTANT_SCORE_FILTER_REWRITE; } }; + + /** nocommit: doc */ + public static final RewriteMethod CONSTANT_SCORE_FIELDCACHE_REWRITE = new RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryFieldCacheWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_FIELDCACHE_REWRITE; + } + }; + /** A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a * BooleanQuery, and keeps the scores as computed by the Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (revision 1052924) +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (working copy) @@ -240,8 +240,29 @@ @Override public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { - // TODO - we can support with binary search - throw new UnsupportedOperationException(); + // nocommit: can we do this more efficiently than binsearch with seek(ord) ? + int low = 1; + int high = numOrd-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + seek(mid); + int cmp = term.compareTo(text); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return SeekStatus.FOUND; // key found + } + + if (low == numOrd) { + return SeekStatus.END; + } else { + seek(low); + return SeekStatus.NOT_FOUND; + } } @Override @@ -315,7 +336,7 @@ @Override public Comparator getComparator() throws IOException { - throw new UnsupportedOperationException(); + return BytesRef.getUTF8SortedAsUnicodeComparator(); } } } Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryFieldCacheWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryFieldCacheWrapperFilter.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryFieldCacheWrapperFilter.java (revision 0) @@ -0,0 +1,136 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +/** + * TODO: doc + */ +public class MultiTermQueryFieldCacheWrapperFilter extends Filter { + + protected final Q query; + + /** + * Wrap a {@link MultiTermQuery} as a Filter. + */ + protected MultiTermQueryFieldCacheWrapperFilter(Q query) { + this.query = query; + } + + @Override + public String toString() { + // query.toString should be ok for the filter, too, if the query boost is 1.0f + return query.toString(); + } + + @Override + public final boolean equals(final Object o) { + if (o==this) return true; + if (o==null) return false; + if (this.getClass().equals(o.getClass())) { + return this.query.equals( ((MultiTermQueryFieldCacheWrapperFilter)o).query ); + } + return false; + } + + @Override + public final int hashCode() { + return query.hashCode(); + } + + /** Returns the field name for this query */ + public final String getField() { return query.getField(); } + + /** + * Expert: Return the number of unique terms visited during execution of the filter. + * If there are many of them, you may consider using another filter type + * or optimize your total term count in index. + *

This method is not thread safe, be sure to only call it when no filter is running! + * If you re-use the same filter instance for another + * search, be sure to first reset the term counter + * with {@link #clearTotalNumberOfTerms}. + * @see #clearTotalNumberOfTerms + */ + public int getTotalNumberOfTerms() { + return query.getTotalNumberOfTerms(); + } + + /** + * Expert: Resets the counting of unique terms. + * Do this before executing the filter. + * @see #getTotalNumberOfTerms + */ + public void clearTotalNumberOfTerms() { + query.clearTotalNumberOfTerms(); + } + + /** + * Returns a DocIdSet with documents that should be permitted in search + * results. + */ + @Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, query.field); + final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd()); + TermsEnum termsEnum = query.getTermsEnum(new Terms() { + + @Override + public Comparator getComparator() throws IOException { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public TermsEnum iterator() throws IOException { + return fcsi.getTermsEnum(); + } + + }); + + assert termsEnum != null; + if (termsEnum.next() != null) { + // fill into a OpenBitSet + int termCount = 0; + do { + long ord = termsEnum.ord(); + if (ord > 0) { + termSet.fastSet(ord); + termCount++; + } + } while (termsEnum.next() != null); + + query.incTotalNumberOfTerms(termCount); + } else { + return DocIdSet.EMPTY_DOCIDSET; + } + + return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) { + @Override + boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { + return termSet.fastGet(fcsi.getOrd(doc)); + } + }; + } +}