Index: lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (revision 0) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (revision 0) @@ -0,0 +1,92 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests for {@link SpanMultiTermQueryWrapper}, wrapping a few MultiTermQueries. + */ +public class TestSpanMultiTermQueryWrapper extends LuceneTestCase { + private Directory directory; + private IndexReader reader; + private Searcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, directory); + Document doc = new Document(); + Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(field); + + field.setValue("quick brown fox"); + iw.addDocument(doc); + field.setValue("jumps over lazy broun dog"); + iw.addDocument(doc); + field.setValue("jumps over extremely very lazy broxn dog"); + iw.addDocument(doc); + reader = iw.getReader(); + iw.close(); + searcher = new IndexSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testWildcard() throws Exception { + WildcardQuery wq = new WildcardQuery(new Term("field", "bro?n")); + SpanQuery swq = new SpanMultiTermQueryWrapper(wq); + // will only match quick brown fox + SpanFirstQuery sfq = new SpanFirstQuery(swq, 2); + assertEquals(1, searcher.search(sfq, 10).totalHits); + } + + public void testFuzzy() throws Exception { + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan")); + SpanQuery sfq = new SpanMultiTermQueryWrapper(fq); + // will not match quick brown fox + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 3, 6); + assertEquals(2, searcher.search(sprq, 10).totalHits); + } + + public void testFuzzy2() throws Exception { + // maximum of 1 term expansion + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"), 1f, 0, 1); + SpanQuery sfq = new SpanMultiTermQueryWrapper(fq); + // will only match jumps over lazy broun dog + SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 0, 100); + assertEquals(1, searcher.search(sprq, 10).totalHits); + } +} Property changes on: lucene\src\test\org\apache\lucene\search\spans\TestSpanMultiTermQueryWrapper.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1033939) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -32,6 +32,8 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -299,13 +301,28 @@ } private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { + /** Return a suitable Query for a MultiTermQuery term. */ + protected Query getQuery(Term term, int docCount) { + return new TermQuery(term, docCount); + } + + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final Term placeholderTerm = new Term(query.field); - final BooleanQuery result = new BooleanQuery(true); + final Query result = getTopLevelQuery(); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); @@ -315,9 +332,9 @@ final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == docFreq[pos]; - final TermQuery tq = new TermQuery(term, docFreq[pos]); + final Query tq = getQuery(term, docFreq[pos]); tq.setBoost(query.getBoost() * boost[pos]); - result.add(tq, BooleanClause.Occur.SHOULD); + addClause(result, tq); } } query.incTotalNumberOfTerms(size); @@ -433,9 +450,24 @@ this.size = size; } + /** return the maximum priority queue size */ + public int getSize() { + return size; + } + /** Return a suitable Query for a MultiTermQuery term. */ protected abstract Query getQuery(Term term, int docCount); + /** Return a suitable top-level Query for holding all expanded terms. */ + protected Query getTopLevelQuery() { + return new BooleanQuery(true); + } + + /** Add a MultiTermQuery term to the top-level query */ + protected void addClause(Query topLevel, Query subQuery) { + ((BooleanQuery)topLevel).add(subQuery, BooleanClause.Occur.SHOULD); + } + @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); @@ -504,7 +536,7 @@ }); final Term placeholderTerm = new Term(query.field); - final BooleanQuery bq = new BooleanQuery(true); + final Query bq = getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { @@ -512,7 +544,7 @@ assert reader.docFreq(term) == st.docFreq; Query tq = getQuery(term, st.docFreq); tq.setBoost(query.getBoost() * st.boost); // set the boost - bq.add(tq, BooleanClause.Occur.SHOULD); // add to query + addClause(bq, tq); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return bq; @@ -624,6 +656,72 @@ } } + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + * @see #setRewriteMethod + */ + public static final class ScoringSpanBooleanQueryRewrite extends + ScoringBooleanQueryRewrite { + + @Override + protected Query getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(Query topLevel, Query subQuery) { + ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery); + } + + @Override + protected Query getQuery(Term term, int docCount) { + return new SpanTermQuery(term); + } + } + + /** + * A rewrite method that first translates each term into a SpanTermQuery in a + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *

+ * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. + * + * @see #setRewriteMethod + */ + public static final class TopTermsSpanBooleanQueryRewrite extends + TopTermsBooleanQueryRewrite { + /** + * Create a TopTermsSpanBooleanQueryRewrite for + * at most size terms. + *

+ * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsSpanBooleanQueryRewrite(int size) { + super(size); + } + + @Override + protected Query getTopLevelQuery() { + return new SpanOrQuery(); + } + + @Override + protected void addClause(Query topLevel, Query subQuery) { + ((SpanOrQuery)topLevel).addClause((SpanTermQuery)subQuery); + } + + @Override + protected Query getQuery(Term term, int docFreq) { + return new SpanTermQuery(term); + } + } + private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1033939) +++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy) @@ -42,16 +42,20 @@ // copy clauses array into an ArrayList this.clauses = new ArrayList(clauses.length); for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (i == 0) { // check field - field = clause.getField(); - } else if (!clause.getField().equals(field)) { - throw new IllegalArgumentException("Clauses must have same field."); - } - this.clauses.add(clause); + addClause(clauses[i]); } } + /** Adds a clause to this query */ + public final void addClause(SpanQuery clause) { + if (field == null) { + field = clause.getField(); + } else if (!clause.getField().equals(field)) { + throw new IllegalArgumentException("Clauses must have same field."); + } + this.clauses.add(clause); + } + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; + +/** + * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, + * so it can be nested within other SpanQuery classes. + *

+ * The query is rewritten by default to a {@link SpanOrQuery} containing + * the expanded terms, but this can be customized. + *

+ * Example: + *

+ * {@code
+ * WildcardQuery wildcard = new WildcardQuery(new Term("field", "bro?n"));
+ * SpanQuery spanWildcard = new SpanMultiTermQueryWrapper(wildcard);
+ * // do something with spanWildcard, such as use it in a SpanFirstQuery
+ * }
+ * 
+ */ +public class SpanMultiTermQueryWrapper extends SpanQuery { + protected final Q query; + + /** + * Create a new SpanMultiTermQueryWrapper. + * + * @param query Query to wrap. + *

+ * NOTE: This will call {@link MultiTermQuery#setRewriteMethod(MultiTermQuery.RewriteMethod)} + * on the wrapped query, changing its rewrite method to a suitable one for spans. + */ + public SpanMultiTermQueryWrapper(Q query) { + this.query = query; + + MultiTermQuery.RewriteMethod method = query.getRewriteMethod(); + if (method instanceof MultiTermQuery.TopTermsBooleanQueryRewrite) { + int pqsize = ((MultiTermQuery.TopTermsBooleanQueryRewrite) method).getSize(); + setRewriteMethod(new MultiTermQuery.TopTermsSpanBooleanQueryRewrite(pqsize)); + } else { + setRewriteMethod(new MultiTermQuery.ScoringSpanBooleanQueryRewrite()); + } + } + + /** + * Expert: returns the rewriteMethod + */ + public final MultiTermQuery.RewriteMethod getRewriteMethod() { + return query.getRewriteMethod(); + } + + /** + * Expert: sets the rewrite method. This only makes sense + * to be a span rewrite method. + */ + public final void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) { + query.setRewriteMethod(rewriteMethod); + } + + @Override + public Spans getSpans(IndexReader reader) throws IOException { + throw new UnsupportedOperationException("Query should have been rewritten"); + } + + @Override + public String getField() { + return query.getField(); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder(); + builder.append("SpanMultiTermQueryWrapper("); + builder.append(query.toString(field)); + builder.append(")"); + return builder.toString(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + return query.rewrite(reader); + } +} Property changes on: lucene\src\java\org\apache\lucene\search\spans\SpanMultiTermQueryWrapper.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java (revision 1033939) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiSearcher; import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.store.Directory; @@ -74,6 +75,65 @@ writer.close(); IndexSearcher searcher = new IndexSearcher(directory, true); + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "aut.*"))); + SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); + // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, + // true); + int numHits = searcher.search(sfq, null, 1000).totalHits; + assertEquals(1, numHits); + searcher.close(); + directory.close(); + } + + public void testSpanRegexBug() throws CorruptIndexException, IOException { + createRAMDirectories(); + + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "a.*"))); + SpanQuery stq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "b.*"))); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6, + true); + + // 1. Search the same store which works + IndexSearcher[] arrSearcher = new IndexSearcher[2]; + arrSearcher[0] = new IndexSearcher(indexStoreA, true); + arrSearcher[1] = new IndexSearcher(indexStoreB, true); + MultiSearcher searcher = new MultiSearcher(arrSearcher); + int numHits = searcher.search(query, null, 1000).totalHits; + arrSearcher[0].close(); + arrSearcher[1].close(); + + // Will fail here + // We expect 2 but only one matched + // The rewriter function only write it once on the first IndexSearcher + // So it's using term: a1 b1 to search on the second IndexSearcher + // As a result, it won't match the document in the second IndexSearcher + assertEquals(2, numHits); + indexStoreA.close(); + indexStoreB.close(); + } + + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexOld() throws Exception { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer())); + Document doc = new Document(); + // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", + // Field.Store.NO, Field.Index.ANALYZED)); + // writer.addDocument(doc); + // doc = new Document(); + doc.add(newField("field", "auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "first auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + IndexSearcher searcher = new IndexSearcher(directory, true); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*")); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, @@ -84,7 +144,9 @@ directory.close(); } - public void testSpanRegexBug() throws CorruptIndexException, IOException { + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexBugOld() throws CorruptIndexException, IOException { createRAMDirectories(); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*")); Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (revision 1033939) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (working copy) @@ -18,115 +18,27 @@ */ import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import java.io.IOException; -import java.util.Collection; -import java.util.ArrayList; - /** * A SpanQuery version of {@link RegexQuery} allowing regular expression * queries to be nested within other SpanQuery subclasses. + * @deprecated Use new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery()) instead. + * This query will be removed in Lucene 4.0 */ -public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable { - private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); - private Term term; +@Deprecated +public class SpanRegexQuery extends SpanMultiTermQueryWrapper implements RegexQueryCapable { + private final RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); public SpanRegexQuery(Term term) { - this.term = term; + super(new RegexQuery(term)); } - public Term getTerm() { return term; } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - RegexQuery orig = new RegexQuery(term); - orig.setRegexImplementation(regexImpl); - orig.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - BooleanQuery bq = (BooleanQuery) orig.rewrite(reader); - - BooleanClause[] clauses = bq.getClauses(); - SpanQuery[] sqs = new SpanQuery[clauses.length]; - for (int i = 0; i < clauses.length; i++) { - BooleanClause clause = clauses[i]; - - // Clauses from RegexQuery.rewrite are always TermQuery's - TermQuery tq = (TermQuery) clause.getQuery(); - - sqs[i] = new SpanTermQuery(tq.getTerm()); - sqs[i].setBoost(tq.getBoost()); - } - - SpanOrQuery query = new SpanOrQuery(sqs); - query.setBoost(orig.getBoost()); - - return query; - } - - @Override - public Spans getSpans(IndexReader reader) throws IOException { - throw new UnsupportedOperationException("Query should have been rewritten"); - } - - @Override - public String getField() { - return term.field(); - } - - public Collection getTerms() { - Collection terms = new ArrayList(); - terms.add(term); - return terms; - } - - /* generated by IntelliJ IDEA */ - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final SpanRegexQuery that = (SpanRegexQuery) o; - - if (!regexImpl.equals(that.regexImpl)) return false; - if (!term.equals(that.term)) return false; - - return true; - } - - /* generated by IntelliJ IDEA */ - @Override - public int hashCode() { - int result; - result = regexImpl.hashCode(); - result = 29 * result + term.hashCode(); - return result; - } - - @Override - public String toString(String field) { - StringBuilder buffer = new StringBuilder(); - buffer.append("spanRegexQuery("); - buffer.append(term); - buffer.append(")"); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); - } - public void setRegexImplementation(RegexCapabilities impl) { - this.regexImpl = impl; + query.setRegexImplementation(impl); } public RegexCapabilities getRegexImplementation() { - return regexImpl; + return query.getRegexImplementation(); } }