new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery()) instead.
+ * This query will be removed in Lucene 4.0
*/
-public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable {
- private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities();
- private Term term;
+@Deprecated
+public class SpanRegexQuery extends SpanMultiTermQueryWrapperextends RewriteMethod { + + /** Return a suitable top-level Query for holding all expanded terms. */ + protected abstract Q getTopLevelQuery() throws IOException; + + /** Add a MultiTermQuery term to the top-level query */ + protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { final ListsubReaders = new ArrayList (); ReaderUtil.gatherSubReaders(subReaders, reader); @@ -298,14 +304,15 @@ } } - private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { + private static abstract class ScoringRewrite extends TermCollectingRewrite{ + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final Term placeholderTerm = new Term(query.field); - final BooleanQuery result = new BooleanQuery(true); + final Q result = getTopLevelQuery(); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); @@ -315,20 +322,13 @@ final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == docFreq[pos]; - final TermQuery tq = new TermQuery(term, docFreq[pos]); - tq.setBoost(query.getBoost() * boost[pos]); - result.add(tq, BooleanClause.Occur.SHOULD); + addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); } } query.incTotalNumberOfTerms(size); return result; } - // Make sure we are still a singleton even after deserializing - protected Object readResolve() { - return SCORING_BOOLEAN_QUERY_REWRITE; - } - static final class ParallelArraysTermCollector extends TermCollector { final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16); final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); @@ -412,15 +412,87 @@ * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see #setRewriteMethod */ - public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringRewrite() { + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { + final TermQuery tq = new TermQuery(term, docCount); + tq.setBoost(boost); + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return SCORING_BOOLEAN_QUERY_REWRITE; + } + }; + + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. + * + * NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + Query result = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query); + assert result instanceof BooleanQuery; + // TODO: if empty boolean query return NullQuery? + if (!((BooleanQuery) result).clauses().isEmpty()) { + // strip the scores off + result = new ConstantScoreQuery(new QueryWrapperFilter(result)); + result.setBoost(query.getBoost()); + } + return result; + } + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + } + }; /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + * @see #setRewriteMethod + */ + public final static RewriteMethod SCORING_SPAN_QUERY_REWRITE = new ScoringRewrite
() { + @Override + protected SpanOrQuery getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + final SpanTermQuery q = new SpanTermQuery(term); + q.setBoost(boost); + topLevel.addClause(q); + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return SCORING_SPAN_QUERY_REWRITE; + } + }; + + /** * Base rewrite method for collecting only the top terms * via a priority queue. */ - public static abstract class TopTermsBooleanQueryRewrite extends BooleanQueryRewrite { - private final int size; + public static abstract class TopTermsRewrite extends ScoringRewrite{ + private final int maxSize; /** * Create a TopTermsBooleanQueryRewrite for @@ -429,16 +501,17 @@ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than *size, then it will be used instead. */ - public TopTermsBooleanQueryRewrite(int size) { - this.size = size; + public TopTermsRewrite(int size) { + this.maxSize = size; } - /** Return a suitable Query for a MultiTermQuery term. */ - protected abstract Query getQuery(Term term, int docCount); - + /** return the maximum priority queue size */ + public int getSize() { + return maxSize; + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { - final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); final PriorityQueuestQueue = new PriorityQueue (); collectTerms(reader, query, new TermCollector() { private final MaxNonCompetitiveBoostAttribute maxBoostAtt = @@ -504,23 +577,21 @@ }); final Term placeholderTerm = new Term(query.field); - final BooleanQuery bq = new BooleanQuery(true); + final Q q = getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = placeholderTerm.createTerm(st.bytes); assert reader.docFreq(term) == st.docFreq; - Query tq = getQuery(term, st.docFreq); - tq.setBoost(query.getBoost() * st.boost); // set the boost - bq.add(tq, BooleanClause.Occur.SHOULD); // add to query + addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); - return bq; + return q; } @Override public int hashCode() { - return 31 * size; + return 31 * maxSize; } @Override @@ -528,8 +599,8 @@ if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - TopTermsBooleanQueryRewrite other = (TopTermsBooleanQueryRewrite) obj; - if (size != other.size) return false; + final TopTermsRewrite other = (TopTermsRewrite) obj; + if (maxSize != other.maxSize) return false; return true; } @@ -574,8 +645,7 @@ * * @see #setRewriteMethod */ - public static final class TopTermsScoringBooleanQueryRewrite extends - TopTermsBooleanQueryRewrite { + public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite { /** * Create a TopTermsScoringBooleanQueryRewrite for @@ -585,13 +655,20 @@ * size, then it will be used instead. */ public TopTermsScoringBooleanQueryRewrite(int size) { - super(size); + super(Math.min(size, BooleanQuery.getMaxClauseCount())); } @Override - protected Query getQuery(Term term, int docFreq) { - return new TermQuery(term, docFreq); + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { + final TermQuery tq = new TermQuery(term, docCount); + tq.setBoost(boost); + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } } /** @@ -604,8 +681,7 @@ * * @see #setRewriteMethod */ - public static final class TopTermsBoostOnlyBooleanQueryRewrite extends - TopTermsBooleanQueryRewrite { + public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite{ /** * Create a TopTermsBoostOnlyBooleanQueryRewrite for @@ -615,49 +691,56 @@ * size, then it will be used instead. */ public TopTermsBoostOnlyBooleanQueryRewrite(int size) { - super(size); + super(Math.min(size, BooleanQuery.getMaxClauseCount())); } @Override - protected Query getQuery(Term term, int docFreq) { - return new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term, docFreq))); + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) { + final Query q = new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term, docFreq))); + q.setBoost(boost); + topLevel.add(q, BooleanClause.Occur.SHOULD); + } } - private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *+ * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. + * + * @see #setRewriteMethod + */ + public static final class TopTermsSpanBooleanQueryRewrite extends TopTermsRewrite
{ + /** + * Create a TopTermsSpanBooleanQueryRewrite for + * at most sizeterms. + */ + public TopTermsSpanBooleanQueryRewrite(int size) { + super(size); + } + @Override - public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - Query result = super.rewrite(reader, query); - assert result instanceof BooleanQuery; - // TODO: if empty boolean query return NullQuery? - if (!((BooleanQuery) result).clauses().isEmpty()) { - // strip the scores off - result = new ConstantScoreQuery(new QueryWrapperFilter(result)); - result.setBoost(query.getBoost()); - } - return result; + protected SpanOrQuery getTopLevelQuery() { + return new SpanOrQuery(); } - // Make sure we are still a singleton even after deserializing @Override - protected Object readResolve() { - return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + final SpanTermQuery q = new SpanTermQuery(term); + q.setBoost(boost); + topLevel.addClause(q); } } + - /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except - * scores are not computed. Instead, each matching - * document receives a constant score equal to the - * query's boost. - * - *NOTE: This rewrite method will hit {@link - * BooleanQuery.TooManyClauses} if the number of terms - * exceeds {@link BooleanQuery#getMaxClauseCount}. - * - * @see #setRewriteMethod */ - public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite(); - - /** A rewrite method that tries to pick the best * constant-score rewrite method based on term and * document counts from the query. If both the number of @@ -666,7 +749,7 @@ * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is * used. */ - public static class ConstantScoreAutoRewrite extends BooleanQueryRewrite { + public static class ConstantScoreAutoRewrite extends TermCollectingRewrite
{ // Defaults derived from rough tests with a 20.0 million // doc Wikipedia index. With more than 350 terms in the @@ -707,6 +790,16 @@ } @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { + topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + } + + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { // Get the enum and start visiting terms. If we @@ -722,18 +815,16 @@ if (col.hasCutOff) { return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); } else if (size == 0) { - return new BooleanQuery(true); + return getTopLevelQuery(); } else { - final BooleanQuery bq = new BooleanQuery(true); + final BooleanQuery bq = getTopLevelQuery(); final Term placeholderTerm = new Term(query.field); final BytesRefHash pendingTerms = col.pendingTerms; final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); for(int i = 0; i < size; i++) { // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated - bq.add(new TermQuery( - placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1 - ), BooleanClause.Occur.SHOULD); + addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); } // Strip scores final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; + +/** + * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, + * so it can be nested within other SpanQuery classes. + * + * The query is rewritten by default to a {@link SpanOrQuery} containing + * the expanded terms, but this can be customized. + *
+ * Example: + *
+ */ +public class SpanMultiTermQueryWrapper+ * {@code + * WildcardQuery wildcard = new WildcardQuery(new Term("field", "bro?n")); + * SpanQuery spanWildcard = new SpanMultiTermQueryWrapper(wildcard); + * // do something with spanWildcard, such as use it in a SpanFirstQuery + * } + * extends SpanQuery { + protected final Q query; + + /** + * Create a new SpanMultiTermQueryWrapper. + * + * @param query Query to wrap. + *+ * NOTE: This will call {@link MultiTermQuery#setRewriteMethod(MultiTermQuery.RewriteMethod)} + * on the wrapped
query, changing its rewrite method to a suitable one for spans. + */ + public SpanMultiTermQueryWrapper(Q query) { + this.query = query; + + MultiTermQuery.RewriteMethod method = query.getRewriteMethod(); + if (method instanceof MultiTermQuery.TopTermsRewrite) { + int pqsize = ((MultiTermQuery.TopTermsRewrite) method).getSize(); + setRewriteMethod(new MultiTermQuery.TopTermsSpanBooleanQueryRewrite(pqsize)); + } else { + setRewriteMethod(MultiTermQuery.SCORING_SPAN_QUERY_REWRITE); + } + } + + /** + * Expert: returns the rewriteMethod + */ + public final MultiTermQuery.RewriteMethod getRewriteMethod() { + return query.getRewriteMethod(); + } + + /** + * Expert: sets the rewrite method. This only makes sense + * to be a span rewrite method. + */ + public final void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) { + query.setRewriteMethod(rewriteMethod); + } + + @Override + public Spans getSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException("Query should have been rewritten"); + } + + @Override + public String getField() { + return query.getField(); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder(); + builder.append("SpanMultiTermQueryWrapper("); + builder.append(query.toString(field)); + builder.append(")"); + return builder.toString(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + return query.rewrite(reader); + } +} Property changes on: lucene\src\java\org\apache\lucene\search\spans\SpanMultiTermQueryWrapper.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1034499) +++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy) @@ -42,16 +42,20 @@ // copy clauses array into an ArrayList this.clauses = new ArrayList(clauses.length); for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (i == 0) { // check field - field = clause.getField(); - } else if (!clause.getField().equals(field)) { - throw new IllegalArgumentException("Clauses must have same field."); - } - this.clauses.add(clause); + addClause(clauses[i]); } } + /** Adds a clause to this query */ + public final void addClause(SpanQuery clause) { + if (field == null) { + field = clause.getField(); + } else if (!clause.getField().equals(field)) { + throw new IllegalArgumentException("Clauses must have same field."); + } + this.clauses.add(clause); + } + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); Index: lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (revision 0) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (revision 0) @@ -0,0 +1,92 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests for {@link SpanMultiTermQueryWrapper}, wrapping a few MultiTermQueries. + */ +public class TestSpanMultiTermQueryWrapper extends LuceneTestCase { + private Directory directory; + private IndexReader reader; + private Searcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, directory); + Document doc = new Document(); + Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field); + + field.setValue("quick brown fox"); + iw.addDocument(doc); + field.setValue("jumps over lazy broun dog"); + iw.addDocument(doc); + field.setValue("jumps over extremely very lazy broxn dog"); + iw.addDocument(doc); + reader = iw.getReader(); + iw.close(); + searcher = new IndexSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testWildcard() throws Exception { + WildcardQuery wq = new WildcardQuery(new Term("field", "bro?n")); + SpanQuery swq = new SpanMultiTermQueryWrapper (wq); + // will only match quick brown fox + SpanFirstQuery sfq = new SpanFirstQuery(swq, 2); + assertEquals(1, searcher.search(sfq, 10).totalHits); + } + + public void testFuzzy() throws Exception { + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan")); + SpanQuery sfq = new SpanMultiTermQueryWrapper (fq); + // will not match quick brown fox + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 3, 6); + assertEquals(2, searcher.search(sprq, 10).totalHits); + } + + public void testFuzzy2() throws Exception { + // maximum of 1 term expansion + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"), 1f, 0, 1); + SpanQuery sfq = new SpanMultiTermQueryWrapper (fq); + // will only match jumps over lazy broun dog + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 0, 100); + assertEquals(1, searcher.search(sprq, 10).totalHits); + } +} Property changes on: lucene\src\test\org\apache\lucene\search\spans\TestSpanMultiTermQueryWrapper.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native