Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 795767)
+++ CHANGES.txt (working copy)
@@ -66,11 +66,12 @@
Changes in runtime behavior
- 1. LUCENE-1424: QueryParser now by default uses constant score query
+ 1. LUCENE-1424: QueryParser now by default uses constant score auto
rewriting when it generates a WildcardQuery and PrefixQuery (it
- already does so for RangeQuery, as well). Call
- setConstantScoreRewrite(false) to revert to BooleanQuery rewriting
- method. (Mark Miller via Mike McCandless)
+ already does so for TermRangeQuery, as well). Call
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
+ to revert to slower BooleanQuery rewriting method. (Mark Miller via Mike
+ McCandless)
2. LUCENE-1575: As of 2.9, the core collectors as well as
IndexSearcher's search methods that return top N results, no
@@ -296,10 +297,10 @@
includes more detailed status than previously. (Tim Smith via
Mike McCandless)
-28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed
- to TermRangeQuery and TermRangeFilter. TermRangeQuery is in
- constant score rewrite mode by default. The new classes also have
- new ctors taking field and term ranges as Strings (see also
+28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed to
+ TermRangeQuery and TermRangeFilter. TermRangeQuery is in constant
+ score auto rewrite mode by default. The new classes also have new
+ ctors taking field and term ranges as Strings (see also
LUCENE-1424). (Uwe Schindler)
29. LUCENE-1609: The termInfosIndexDivisor must now be specified
@@ -434,7 +435,7 @@
6. LUCENE-1424: Moved constant score query rewrite capability into
MultiTermQuery, allowing TermRangeQuery, PrefixQuery and WildcardQuery
to switch betwen constant-score rewriting or BooleanQuery
- expansion rewriting via a new setConstantScoreRewrite method.
+ expansion rewriting via a new setRewriteMethod method.
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
McCandless)
@@ -579,6 +580,16 @@
On 32 bit platforms, the address space can be very fragmented, so
one big ByteBuffer for the whole file may not fit into address space.
(Eks Dev via Uwe Schindler)
+
+33. LUCENE-1644: Enable 4 rewrite modes for queries deriving from
+ MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery,
+ NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a
+ filter and then assigns constant score (boost) to docs;
+ CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE create a BooleanQuery but
+ uses a constant score (boost); SCORING_BOOLEAN_QUERY_REWRITE also
+ creates a BooleanQuery but keeps the BooleanQuery's scores;
+ CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant
+ constant-score rewrite method. (Mike McCandless)
Optimizations
Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 795767)
+++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -46,6 +46,7 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -433,11 +434,11 @@
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
- assertTrue(((TermRangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite());
+ assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
- qp.setConstantScoreRewrite(false);
- assertFalse(((TermRangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite());
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
assertQueryEquals("{ a TO z}", null, "{a TO z}");
@@ -476,7 +477,7 @@
// supported).
// Test ConstantScoreRangeQuery
- qp.setConstantScoreRewrite(true);
+ qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
@@ -484,7 +485,7 @@
assertEquals("The index Term should be included.", 1, result.length);
// Test TermRangeQuery
- qp.setConstantScoreRewrite(false);
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
===================================================================
--- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy)
@@ -88,29 +88,35 @@
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
+ public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
+ TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ query.setRewriteMethod(method);
+ return query;
+ }
+
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il,
boolean ih, Collator c) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cswcq(Term wild) {
WildcardQuery query = new WildcardQuery(wild);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
@@ -156,6 +162,14 @@
result[i].score);
}
+ result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs;
+ numHits = result.length;
+ assertEquals("wrong number of results", 6, numHits);
+ for (int i = 0; i < numHits; i++) {
+ assertEquals("score for " + i + " was not the same", score,
+ result[i].score);
+ }
+
}
public void testBoost() throws IOException {
@@ -201,6 +215,18 @@
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
+ q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0
+ q1.setBoost(.1f);
+ q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1
+ bq = new BooleanQuery(true);
+ bq.add(q1, BooleanClause.Occur.SHOULD);
+ bq.add(q2, BooleanClause.Occur.SHOULD);
+
+ hits = search.search(bq, null, 1000).scoreDocs;
+ assertEquals(1, hits[0].doc);
+ assertEquals(0, hits[1].doc);
+ assertTrue(hits[0].score > hits[1].score);
+
q1 = csrq("data", "A", "A", T, T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data", "Z", "Z", T, T); // matches document #1
@@ -268,21 +294,39 @@
result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
+ result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
+
result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
+ result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but last", numDocs - 1, result.length);
+
result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but first", numDocs - 1, result.length);
+
result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but ends", numDocs - 2, result.length);
+
result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
+ result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med and up", 1 + maxId - medId, result.length);
+
result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
+ result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("up to med", 1 + medId - minId, result.length);
+
// unbounded id
result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs;
@@ -307,24 +351,51 @@
result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
+
+ result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
+
result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
+
+ result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med,med,F,F", 0, result.length);
+
result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
+ result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
+
result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
+
+ result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
+
result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
+ result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
+
result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
+
+ result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
+
result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
+ result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
+
result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
+ result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med,med,T,T", 1, result.length);
}
public void testRangeQueryIdCollating() throws IOException {
Index: src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
===================================================================
--- src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
-import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@@ -104,13 +103,13 @@
switch (i) {
case 0:
type = " (constant score)";
- q.setConstantScoreRewrite(true);
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
- q.setConstantScoreRewrite(false);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@@ -229,7 +228,7 @@
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -238,7 +237,7 @@
// test exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -247,7 +246,7 @@
// test left exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -256,7 +255,7 @@
// test right exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Index: src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
===================================================================
--- src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
-import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@@ -108,13 +107,13 @@
switch (i) {
case 0:
type = " (constant score)";
- q.setConstantScoreRewrite(true);
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
- q.setConstantScoreRewrite(false);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@@ -245,7 +244,7 @@
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -254,7 +253,7 @@
// test exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -263,7 +262,7 @@
// test left exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -272,7 +271,7 @@
// test right exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Index: src/java/org/apache/lucene/queryParser/Token.java
===================================================================
--- src/java/org/apache/lucene/queryParser/Token.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
+/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java
===================================================================
--- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy)
@@ -137,4 +137,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */
+/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -25,6 +25,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -118,7 +119,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
- boolean constantScoreRewrite= true;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@@ -331,40 +332,48 @@
}
/**
- * @deprecated Please use {@link #setConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- constantScoreRewrite = !useOldRangeQuery;
+ if (useOldRangeQuery) {
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ } else {
+ setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+ }
}
/**
- * @deprecated Please use {@link #getConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
- return !constantScoreRewrite;
+ if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
+ return true;
+ } else {
+ return false;
+ }
}
/**
- * By default QueryParser uses constant-score rewriting
+ * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then set this option to true
- * Default is false.
+ * points are not relevant then use this to change
+ * the rewrite method.
*/
- public void setConstantScoreRewrite(boolean v) {
- constantScoreRewrite = v;
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
}
/**
- * @see #setConstantScoreRewrite(boolean)
+ * @see #setMultiTermRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
}
/**
@@ -858,7 +867,7 @@
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -884,7 +893,7 @@
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -903,7 +912,7 @@
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -49,6 +49,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -142,7 +143,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
- boolean constantScoreRewrite= true;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@@ -355,40 +356,48 @@
}
/**
- * @deprecated Please use {@link #setConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- constantScoreRewrite = !useOldRangeQuery;
+ if (useOldRangeQuery) {
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ } else {
+ setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+ }
}
/**
- * @deprecated Please use {@link #getConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
- return !constantScoreRewrite;
+ if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
+ return true;
+ } else {
+ return false;
+ }
}
/**
- * By default QueryParser uses constant-score rewriting
+ * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then set this option to true
- * Default is false.
+ * points are not relevant then use this to change
+ * the rewrite method.
*/
- public void setConstantScoreRewrite(boolean v) {
- constantScoreRewrite = v;
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
}
/**
- * @see #setConstantScoreRewrite(boolean)
+ * @see #setMultiTermRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
}
/**
@@ -882,7 +891,7 @@
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -908,7 +917,7 @@
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -927,7 +936,7 @@
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
Index: src/java/org/apache/lucene/queryParser/CharStream.java
===================================================================
--- src/java/org/apache/lucene/queryParser/CharStream.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy)
@@ -109,4 +109,4 @@
void Done();
}
-/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
+/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ParseException.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ParseException.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy)
@@ -195,4 +195,4 @@
}
}
-/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
+/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (working copy)
@@ -29,6 +29,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@@ -77,9 +78,9 @@
public Query parse(String query) throws ParseException {
if (isPass2ResolvingPhrases) {
- boolean oldConstantScoreRewriteSetting = getConstantScoreRewrite();
+ MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod();
try {
- // Temporarily set constantScoreRewrite to false so that Parser will
+ // Temporarily force BooleanQuery rewrite so that Parser will
// generate visible
// collection of terms which we can convert into SpanQueries.
// ConstantScoreRewrite mode produces an
@@ -88,10 +89,10 @@
// QueryParser is not guaranteed threadsafe anyway so this temporary
// state change should not
// present an issue
- setConstantScoreRewrite(false);
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return super.parse(query);
} finally {
- setConstantScoreRewrite(oldConstantScoreRewriteSetting);
+ setMultiTermRewriteMethod(oldMethod);
}
}
@@ -165,7 +166,7 @@
// that can be turned into SpanOr clause
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
getRangeCollator());
- rangeQuery.setConstantScoreRewrite(false);;
+ rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return rangeQuery;
}
return super.newRangeQuery(field, part1, part2, inclusive);
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
Index: src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -18,6 +18,10 @@
*/
import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -34,24 +38,288 @@
* FilteredTermEnum} that iterates through the terms to be
* matched.
*
- *
NOTE: if {@link #setConstantScoreRewrite} is - * false, you may encounter a {@link - * BooleanQuery.TooManyClauses} exception during searching, - * which happens when the number of terms to be searched - * exceeds {@link BooleanQuery#getMaxClauseCount()}. - * Setting {@link #setConstantScoreRewrite} to false + *
NOTE: if {@link #setRewriteMethod} is either + * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + * {@link BooleanQuery.TooManyClauses} exception during + * searching, which happens when the number of terms to be + * searched exceeds {@link + * BooleanQuery#getMaxClauseCount()}. Setting {@link + * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} * prevents this. * - * Note that {@link QueryParser} by default produces - * MultiTermQueries with {@link #setConstantScoreRewrite} - * true. + *
The recommended rewrite method is {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + * computing unhelpful scores, and it tries to pick the most + * performant rewrite method given the query. + * + * Note that {@link QueryParser} produces + * MultiTermQueries using {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. */ public abstract class MultiTermQuery extends Query { /* @deprecated move to sub class */ protected Term term; - protected boolean constantScoreRewrite = false; + protected RewriteMethod rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; transient int numberOfTerms = 0; + // nocommit + private static boolean first=true; + + /** Abstract class that defines how the query is rewritten. */ + public static abstract class RewriteMethod implements Serializable { + public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; + } + + private static final class ConstantScoreFilterRewrite extends RewriteMethod implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_FILTER_REWRITE; + } + } + + /** A rewrite method that first creates a private Filter, + * by visiting each term in sequence and marking all docs + * for that term. Matching documents are assigned a + * constant score equal to the query's boost. + * + *
This method is faster than the BooleanQuery + * rewrite methods when the number of matched terms or + * matched documents is non-trivial. Also, it will never + * hit an errant {@link BooleanQuery.TooManyClauses} + * exception. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite(); + + private static class ScoringBooleanQueryRewrite extends RewriteMethod implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + + FilteredTermEnum enumerator = query.getEnum(reader); + BooleanQuery result = new BooleanQuery(true); + int count = 0; + try { + do { + Term t = enumerator.term(); + if (t != null) { + count++; + TermQuery tq = new TermQuery(t); // found a match + tq.setBoost(query.getBoost() * enumerator.difference()); // set the boost + result.add(tq, BooleanClause.Occur.SHOULD); // add to query + } + } while (enumerator.next()); + if (first) { + System.out.println("bq terms=" + count); + first = false; + } + } finally { + enumerator.close(); + } + query.incTotalNumberOfTerms(count); + return result; + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return SCORING_BOOLEAN_QUERY_REWRITE; + } + } + + /** A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a + * BooleanQuery, and keeps the scores as computed by the + * query. Note that typically such scores are + * meaningless to the user, and require non-trivial CPU + * to compute, so it's almost always better to use {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. + * + *
NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); + + private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + // strip the scores off + Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query))); + result.setBoost(query.getBoost()); + return result; + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + } + } + + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. + * + *
NOTE: This rewrite method will hit {@link
+ * BooleanQuery.TooManyClauses} if the number of terms
+ * exceeds {@link BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite();
+
+
+ /** A rewrite method that tries to pick the best
+ * constant-score rewrite method based on term and
+ * document counts from the query. If both the number of
+ * terms and documents is small enough, then {@link
+ * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.
+ * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
+ * used.
+ */
+ public static class ConstantScoreAutoRewrite extends RewriteMethod implements Serializable {
+
+ public static int DEFAULT_TERM_COUNT_CUTOFF = 10000;
+ public static int DEFAULT_DOC_COUNT_PERCENT = 1;
+
+ private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
+ private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
+
+ /** If the number of terms in this query is equal to or
+ * larger than this setting then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used. */
+ public void setTermCountCutoff(int count) {
+ termCountCutoff = count;
+ }
+
+ /** @see #setTermCountCutoff */
+ public int getTermCountCutoff() {
+ return termCountCutoff;
+ }
+
+ /** If the number of documents to be visited in the
+ * postings exceeds this specified percentage of the
+ * maxDoc() for the index, then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used.
+ * @param percent 0.0 to 100.0 */
+ public void setDocCountPercent(double percent) {
+ docCountPercent = percent;
+ }
+
+ /** @see #setDocCountPercent */
+ public double getDocCountPercent() {
+ return docCountPercent;
+ }
+
+ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+ // Get the enum and start visiting terms. If we
+ // exhaust the enum before hitting either of the
+ // cutoffs, we use ConstantBooleanQueryRewrite; else,
+ // ConstantFilterRewrite:
+ final Collection pendingTerms = new ArrayList();
+ final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
+ final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
+ int docVisitCount = 0;
+
+ FilteredTermEnum enumerator = query.getEnum(reader);
+ try {
+ while(true) {
+ Term t = enumerator.term();
+ if (t != null) {
+ pendingTerms.add(t);
+ // Loading the TermInfo from the terms dict here
+ // should not be costly, because 1) the
+ // query/filter will load the TermInfo when it
+ // runs, and 2) the terms dict has a cache:
+ docVisitCount += reader.docFreq(t);
+ }
+
+ if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
+ // Too many terms -- make a filter.
+ if (first) {
+ System.out.println("auto terms=" + pendingTerms.size() + " docs=" + docVisitCount);
+ first = false;
+ }
+ Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
+ result.setBoost(query.getBoost());
+ return result;
+ } else if (!enumerator.next()) {
+ if (first) {
+ System.out.println("auto terms=" + pendingTerms.size() + " docs=" + docVisitCount);
+ first = false;
+ }
+ // Enumeration is done, and we hit a small
+ // enough number of terms & docs -- just make a
+ // BooleanQuery, now
+ Iterator it = pendingTerms.iterator();
+ BooleanQuery bq = new BooleanQuery(true);
+ while(it.hasNext()) {
+ TermQuery tq = new TermQuery((Term) it.next());
+ bq.add(tq, BooleanClause.Occur.SHOULD);
+ }
+ // Strip scores
+ Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ result.setBoost(query.getBoost());
+ return result;
+ }
+ }
+ } finally {
+ enumerator.close();
+ }
+ }
+
+ public int hashCode() {
+ final int prime = 1279;
+ return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+
+ ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
+ if (other.termCountCutoff != termCountCutoff) {
+ return false;
+ }
+
+ if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ /** Read-only default instance of {@link
+ * ConstantScoreAutoRewrite}, with {@link
+ * ConstantScoreAutoRewrite#setTermCountCutoff} set to
+ * {@link
+ * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
+ * and {@link
+ * ConstantScoreAutoRewrite#setDocCountPercent} set to
+ * {@link
+ * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}.
+ * Note that you cannot alter the configuration of this
+ * instance; you'll need to create a private instance
+ * instead. */
+ public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() {
+ public void setTermCountCutoff(int count) {
+ throw new UnsupportedOperationException("Please create a private instance");
+ }
+
+ public void setDocCountPercent(double percent) {
+ throw new UnsupportedOperationException("Please create a private instance");
+ }
+ };
+
/** Constructs a query for terms matching term. */
public MultiTermQuery(Term term) {
this.term = term;
@@ -105,33 +373,12 @@
numberOfTerms = 0;
}
- protected Filter getFilter() {
- return new MultiTermQueryWrapperFilter(this);
+ protected void incTotalNumberOfTerms(int inc) {
+ numberOfTerms += inc;
}
public Query rewrite(IndexReader reader) throws IOException {
- if (!constantScoreRewrite) {
- FilteredTermEnum enumerator = getEnum(reader);
- BooleanQuery query = new BooleanQuery(true);
- try {
- do {
- Term t = enumerator.term();
- if (t != null) {
- numberOfTerms++;
- TermQuery tq = new TermQuery(t); // found a match
- tq.setBoost(getBoost() * enumerator.difference()); // set the boost
- query.add(tq, BooleanClause.Occur.SHOULD); // add to query
- }
- } while (enumerator.next());
- } finally {
- enumerator.close();
- }
- return query;
- } else {
- Query query = new ConstantScoreQuery(getFilter());
- query.setBoost(getBoost());
- return query;
- }
+ return rewriteMethod.rewrite(reader, this);
}
@@ -155,34 +402,18 @@
}
/**
- * @see #setConstantScoreRewrite
+ * @see #setRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public RewriteMethod getRewriteMethod() {
+ return rewriteMethod;
}
/**
- * This method determines what method is used during searching:
- *
false
- * (the default), the query is rewritten to {@link
- * BooleanQuery} with one clause for each term in the
- * range. If the the number of terms in the range
- * exceeds {@link BooleanQuery#getMaxClauseCount()}, a
- * {@link BooleanQuery.TooManyClauses} exception will be
- * thrown during searching. This mode may also give
- * worse performance when the number of terms is large,
- * and/or the number of matching documents is large.
- *
- * true,
- * the query is first rewritten to a filter. Matching
- * documents will identical scores, equal to this
- * query's boost.
- * MultiTermQueryWrapperFilter.
* This class also provides the functionality behind
- * {@link MultiTermQuery#getFilter}, this is why it is not abstract.
+ * {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE};
+ * this is why it is not abstract.
*/
public class MultiTermQueryWrapperFilter extends Filter {
@@ -96,6 +97,8 @@
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
+ final int[] docs = new int[32];
+ final int[] freqs = new int[32];
TermDocs termDocs = reader.termDocs();
try {
do {
@@ -104,8 +107,15 @@
break;
query.numberOfTerms++;
termDocs.seek(term);
- while (termDocs.next()) {
- handleDoc(termDocs.doc());
+ while (true) {
+ final int count = termDocs.read(docs, freqs);
+ if (count != 0) {
+ for(int i=0;iThis query uses {@linkplain
+ * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}. If you
+ * want to change this, use the new {@link TermRangeQuery}
+ * instead.
*
* @deprecated Use {@link TermRangeQuery} for term ranges or
* {@link NumericRangeQuery} for numeric ranges instead.
@@ -93,7 +94,7 @@
inclusive, inclusive,
collator
);
- delegate.setConstantScoreRewrite(false);
+ delegate.setRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE);
}
public void setBoost(float b) {
Index: src/java/org/apache/lucene/search/PrefixQuery.java
===================================================================
--- src/java/org/apache/lucene/search/PrefixQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy)
@@ -24,7 +24,15 @@
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
- * is built by QueryParser for input like app*. */
+ * is built by QueryParser for input like app*.
+ *
+ *
NOTE: Currently this query uses {@link + * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which + * assigns not-very-useful scores to the resulting hits. In + * 3.0 this default will change to {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you + * can use {@link MultiTermQuery#setRewriteMethod} to change + * it. */ public class PrefixQuery extends MultiTermQuery { private Term prefix; Index: src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (working copy) @@ -29,9 +29,8 @@ * supplied range according to {@link String#compareTo(String)}. It is not intended * for numerical ranges, use {@link NumericRangeQuery} instead. * - *
This query is in - * {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) constant score rewrite mode}. - * If you want to change this, use the new {@link TermRangeQuery} instead. + *
This query is hardwired to {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}. + * If you want to change this, use {@link TermRangeQuery} instead. * * @deprecated Use {@link TermRangeQuery} for term ranges or * {@link NumericRangeQuery} for numeric ranges instead. @@ -44,14 +43,14 @@ public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { super(fieldName, lowerVal, upperVal, includeLower, includeUpper); - this.constantScoreRewrite = true; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper, Collator collator) { super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator); - this.constantScoreRewrite = true; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } public String getLowerVal() { @@ -63,8 +62,9 @@ } /** Changes of mode are not supported by this class (fixed to constant score rewrite mode) */ - public void setConstantScoreRewrite(boolean constantScoreRewrite) { - if (!constantScoreRewrite) - throw new UnsupportedOperationException("Use TermRangeQuery instead to enable boolean query rewrite."); + public void setRewriteMethod(RewriteMethod method) { + if (method != CONSTANT_SCORE_FILTER_REWRITE) { + throw new UnsupportedOperationException("Use TermRangeQuery instead to change the rewrite method."); + } } } Index: src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -123,12 +123,12 @@ * *
Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed * that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) - * took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs + * took about 30-40 secs to complete, {@link TermRangeQuery} in filter rewrite mode took 5 secs * and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit * precision step). This query type was developed for a geographic portal, where the performance for * e.g. bounding boxes or exact date/time stamps is important.
* - *The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}. + *
The query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} * With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery} * rewrite mode without changing the max clause count. * @@ -152,7 +152,7 @@ this.max = max; this.minInclusive = minInclusive; this.maxInclusive = maxInclusive; - setConstantScoreRewrite(true); + setRewriteMethod(CONSTANT_SCORE_FILTER_REWRITE); } /** Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import java.text.Collator; -import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.ToStringUtils; @@ -31,9 +30,9 @@ * supplied range according to {@link String#compareTo(String)}. It is not intended * for numerical ranges, use {@link NumericRangeQuery} instead. * - *
This query is in constant score mode per default. - * See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between - * enabling and disabling constantScoreRewrite mode. + *
This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
* @since 2.9
*/
@@ -110,7 +109,7 @@
this.includeLower = includeLower;
this.includeUpper = includeUpper;
this.collator = collator;
- this.constantScoreRewrite = true;
+ rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
/** Returns the field name for this query */
Index: src/java/org/apache/lucene/search/WildcardQuery.java
===================================================================
--- src/java/org/apache/lucene/search/WildcardQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy)
@@ -30,8 +30,14 @@
* a Wildcard term should not start with one of the wildcards * or
* ?.
*
- * @see WildcardTermEnum
- */
+ *
NOTE: Currently this query uses {@link + * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which + * assigns not-very-useful scores to the resulting hits. In + * 3.0 this default will change to {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you can use {@link + * MultiTermQuery#setRewriteMethod} to change it. + * + * @see WildcardTermEnum */ public class WildcardQuery extends MultiTermQuery { private boolean termContainsWildcard; protected Term term; Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 795767) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -53,6 +53,7 @@ import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Hits; @@ -548,7 +549,7 @@ numHighlights = 0; query = new WildcardQuery(new Term(FIELD_NAME, "ken*")); - ((WildcardQuery)query).setConstantScoreRewrite(true); + ((WildcardQuery)query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); searcher = new IndexSearcher(ramDir); // can't rewrite ConstantScore if you want to highlight it - // it rewrites to ConstantScoreQuery which cannot be highlighted @@ -1186,7 +1187,7 @@ searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); - parser.setConstantScoreRewrite(false); + parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) @@ -1487,7 +1488,7 @@ public void doSearching(String queryString) throws Exception { QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); - parser.setConstantScoreRewrite(false); + parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse(queryString); doSearching(query); } Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 795767) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -28,14 +28,11 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FuzzyQuery; @@ -139,9 +136,9 @@ terms.putAll(disjunctTerms); } else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) { MultiTermQuery mtq = ((MultiTermQuery)query); - if(mtq.getConstantScoreRewrite()) { + if(mtq.getRewriteMethod() == MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE) { mtq = copyMultiTermQuery(mtq); - mtq.setConstantScoreRewrite(false); + mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } String field;