Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1033939) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -32,6 +32,8 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -299,13 +301,28 @@ } private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { + /** Return a suitable Query for a MultiTermQuery term. */ + protected Query getQuery(Term term, int docCount) { + return new TermQuery(term, docCount); + } + + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final Term placeholderTerm = new Term(query.field); - final BooleanQuery result = new BooleanQuery(true); + final Query result = getTopLevelQuery(); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); @@ -315,9 +332,9 @@ final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == docFreq[pos]; - final TermQuery tq = new TermQuery(term, docFreq[pos]); + final Query tq = getQuery(term, docFreq[pos]); tq.setBoost(query.getBoost() * boost[pos]); - result.add(tq, BooleanClause.Occur.SHOULD); + addClause(result, tq); } } query.incTotalNumberOfTerms(size); @@ -436,6 +453,16 @@ /** Return a suitable Query for a MultiTermQuery term. */ protected abstract Query getQuery(Term term, int docCount); + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); @@ -504,7 +531,7 @@ }); final Term placeholderTerm = new Term(query.field); - final BooleanQuery bq = new BooleanQuery(true); + final Query bq = getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { @@ -512,7 +539,7 @@ assert reader.docFreq(term) == st.docFreq; Query tq = getQuery(term, st.docFreq); tq.setBoost(query.getBoost() * st.boost); // set the boost - bq.add(tq, BooleanClause.Occur.SHOULD); // add to query + addClause(bq, tq); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return bq; @@ -624,6 +651,72 @@ } } + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + * @see #setRewriteMethod + */ + public static final class ScoringSpanBooleanQueryRewrite extends + ScoringBooleanQueryRewrite { + + @Override + protected Query getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(Query topLevel, Query subQuery) { + ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery); + } + + @Override + protected Query getQuery(Term term, int docCount) { + return new SpanTermQuery(term); + } + } + + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *
+ * This rewrite method only uses the top scoring terms so it will not overflow
+ * the boolean max clause count.
+ *
+ * @see #setRewriteMethod
+ */
+ public static final class TopTermsSpanBooleanQueryRewrite extends
+ TopTermsBooleanQueryRewrite {
+ /**
+ * Create a TopTermsSpanBooleanQueryRewrite for
+ * at most size terms.
+ *
+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than
+ * size, then it will be used instead.
+ */
+ public TopTermsSpanBooleanQueryRewrite(int size) {
+ super(size);
+ }
+
+ @Override
+ protected Query getTopLevelQuery() {
+ return new SpanOrQuery();
+ }
+
+ @Override
+ protected void addClause(Query topLevel, Query subQuery) {
+ ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery);
+ }
+
+ @Override
+ protected Query getQuery(Term term, int docFreq) {
+ return new SpanTermQuery(term);
+ }
+ }
+
private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable {
@Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1033939)
+++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy)
@@ -42,16 +42,20 @@
// copy clauses array into an ArrayList
this.clauses = new ArrayList extends SpanQuery {
+ protected final Q query;
+ private MultiTermQuery.RewriteMethod rewriteMethod =
+ new MultiTermQuery.ScoringSpanBooleanQueryRewrite();
+
+ public SpanMultiTermQueryWrapper(Q query) {
+ this.query = query;
+ }
+
+ /**
+ * Expert: returns the rewriteMethod
+ */
+ public MultiTermQuery.RewriteMethod getRewriteMethod() {
+ return rewriteMethod;
+ }
+
+ /**
+ * Expert: sets the rewrite method. This only makes sense
+ * to be a span rewrite method.
+ */
+ public void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) {
+ this.rewriteMethod = rewriteMethod;
+ }
+
+ @Override
+ public Spans getSpans(IndexReader reader) throws IOException {
+ throw new UnsupportedOperationException("Query should have been rewritten");
+ }
+
+ @Override
+ public String getField() {
+ return query.getField();
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder builder = new StringBuilder();
+ builder.append("SpanMultiTermQueryWrapper(");
+ builder.append(query.toString(field));
+ builder.append(")");
+ builder.append(ToStringUtils.boost(getBoost()));
+ return builder.toString();
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ // nocommit: we change the subquery here... this is evil
+ query.setRewriteMethod(rewriteMethod);
+ Query rewritten = query.rewrite(reader);
+ rewritten.setBoost(getBoost());
+ return rewritten;
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\spans\SpanMultiTermQueryWrapper.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (revision 1033939)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (working copy)
@@ -18,115 +18,24 @@
*/
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.MultiTermQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.spans.SpanOrQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.ArrayList;
-
/**
* A SpanQuery version of {@link RegexQuery} allowing regular expression
* queries to be nested within other SpanQuery subclasses.
*/
-public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable {
- private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities();
- private Term term;
+public class SpanRegexQuery extends SpanMultiTermQueryWrapper