Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1033939) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -32,6 +32,8 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -299,13 +301,28 @@ } private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { + /** Return a suitable Query for a MultiTermQuery term. */ + protected Query getQuery(Term term, int docCount) { + return new TermQuery(term, docCount); + } + + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final Term placeholderTerm = new Term(query.field); - final BooleanQuery result = new BooleanQuery(true); + final Query result = getTopLevelQuery(); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); @@ -315,9 +332,9 @@ final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == docFreq[pos]; - final TermQuery tq = new TermQuery(term, docFreq[pos]); + final Query tq = getQuery(term, docFreq[pos]); tq.setBoost(query.getBoost() * boost[pos]); - result.add(tq, BooleanClause.Occur.SHOULD); + addClause(result, tq); } } query.incTotalNumberOfTerms(size); @@ -436,6 +453,16 @@ /** Return a suitable Query for a MultiTermQuery term. */ protected abstract Query getQuery(Term term, int docCount); + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); @@ -504,7 +531,7 @@ }); final Term placeholderTerm = new Term(query.field); - final BooleanQuery bq = new BooleanQuery(true); + final Query bq = getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { @@ -512,7 +539,7 @@ assert reader.docFreq(term) == st.docFreq; Query tq = getQuery(term, st.docFreq); tq.setBoost(query.getBoost() * st.boost); // set the boost - bq.add(tq, BooleanClause.Occur.SHOULD); // add to query + addClause(bq, tq); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return bq; @@ -624,6 +651,72 @@ } } + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + * @see #setRewriteMethod + */ + public static final class ScoringSpanBooleanQueryRewrite extends + ScoringBooleanQueryRewrite { + + @Override + protected Query getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(Query topLevel, Query subQuery) { + ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery); + } + + @Override + protected Query getQuery(Term term, int docCount) { + return new SpanTermQuery(term); + } + } + + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *

+ * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. + * + * @see #setRewriteMethod + */ + public static final class TopTermsSpanBooleanQueryRewrite extends + TopTermsBooleanQueryRewrite { + /** + * Create a TopTermsSpanBooleanQueryRewrite for + * at most size terms. + *

+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsSpanBooleanQueryRewrite(int size) { + super(size); + } + + @Override + protected Query getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(Query topLevel, Query subQuery) { + ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery); + } + + @Override + protected Query getQuery(Term term, int docFreq) { + return new SpanTermQuery(term); + } + } + private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1033939) +++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy) @@ -42,16 +42,20 @@ // copy clauses array into an ArrayList this.clauses = new ArrayList(clauses.length); for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (i == 0) { // check field - field = clause.getField(); - } else if (!clause.getField().equals(field)) { - throw new IllegalArgumentException("Clauses must have same field."); - } - this.clauses.add(clause); + addClause(clauses[i]); } } + /** Adds a clause to this query */ + public final void addClause(SpanQuery clause) { + if (field == null) { + field = clause.getField(); + } else if (!clause.getField().equals(field)) { + throw new IllegalArgumentException("Clauses must have same field."); + } + this.clauses.add(clause); + } + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) @@ -0,0 +1,86 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.ToStringUtils; + +/** + * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, + * so it can be nested within other SpanQuery classes. + * + * The query is rewritten by default to a {@link SpanOrQuery} containing + * the expanded terms, but this can be customized. + */ +public class SpanMultiTermQueryWrapper extends SpanQuery { + protected final Q query; + private MultiTermQuery.RewriteMethod rewriteMethod = + new MultiTermQuery.ScoringSpanBooleanQueryRewrite(); + + public SpanMultiTermQueryWrapper(Q query) { + this.query = query; + } + + /** + * Expert: returns the rewriteMethod + */ + public MultiTermQuery.RewriteMethod getRewriteMethod() { + return rewriteMethod; + } + + /** + * Expert: sets the rewrite method. This only makes sense + * to be a span rewrite method. + */ + public void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) { + this.rewriteMethod = rewriteMethod; + } + + @Override + public Spans getSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException("Query should have been rewritten"); + } + + @Override + public String getField() { + return query.getField(); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder(); + builder.append("SpanMultiTermQueryWrapper("); + builder.append(query.toString(field)); + builder.append(")"); + builder.append(ToStringUtils.boost(getBoost())); + return builder.toString(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + // nocommit: we change the subquery here... this is evil + query.setRewriteMethod(rewriteMethod); + Query rewritten = query.rewrite(reader); + rewritten.setBoost(getBoost()); + return rewritten; + } +} Property changes on: lucene\src\java\org\apache\lucene\search\spans\SpanMultiTermQueryWrapper.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (revision 1033939) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (working copy) @@ -18,115 +18,24 @@ */ import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import java.io.IOException; -import java.util.Collection; -import java.util.ArrayList; - /** * A SpanQuery version of {@link RegexQuery} allowing regular expression * queries to be nested within other SpanQuery subclasses. */ -public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable { - private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); - private Term term; +public class SpanRegexQuery extends SpanMultiTermQueryWrapper implements RegexQueryCapable { + private final RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); public SpanRegexQuery(Term term) { - this.term = term; + super(new RegexQuery(term)); } - public Term getTerm() { return term; } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - RegexQuery orig = new RegexQuery(term); - orig.setRegexImplementation(regexImpl); - orig.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - BooleanQuery bq = (BooleanQuery) orig.rewrite(reader); - - BooleanClause[] clauses = bq.getClauses(); - SpanQuery[] sqs = new SpanQuery[clauses.length]; - for (int i = 0; i < clauses.length; i++) { - BooleanClause clause = clauses[i]; - - // Clauses from RegexQuery.rewrite are always TermQuery's - TermQuery tq = (TermQuery) clause.getQuery(); - - sqs[i] = new SpanTermQuery(tq.getTerm()); - sqs[i].setBoost(tq.getBoost()); - } - - SpanOrQuery query = new SpanOrQuery(sqs); - query.setBoost(orig.getBoost()); - - return query; - } - - @Override - public Spans getSpans(IndexReader reader) throws IOException { - throw new UnsupportedOperationException("Query should have been rewritten"); - } - - @Override - public String getField() { - return term.field(); - } - - public Collection getTerms() { - Collection terms = new ArrayList(); - terms.add(term); - return terms; - } - - /* generated by IntelliJ IDEA */ - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final SpanRegexQuery that = (SpanRegexQuery) o; - - if (!regexImpl.equals(that.regexImpl)) return false; - if (!term.equals(that.term)) return false; - - return true; - } - - /* generated by IntelliJ IDEA */ - @Override - public int hashCode() { - int result; - result = regexImpl.hashCode(); - result = 29 * result + term.hashCode(); - return result; - } - - @Override - public String toString(String field) { - StringBuilder buffer = new StringBuilder(); - buffer.append("spanRegexQuery("); - buffer.append(term); - buffer.append(")"); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); - } - public void setRegexImplementation(RegexCapabilities impl) { - this.regexImpl = impl; + query.setRegexImplementation(impl); } public RegexCapabilities getRegexImplementation() { - return regexImpl; + return query.getRegexImplementation(); } }