Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 795767)
+++ CHANGES.txt (working copy)
@@ -66,11 +66,12 @@
Changes in runtime behavior
- 1. LUCENE-1424: QueryParser now by default uses constant score query
+ 1. LUCENE-1424: QueryParser now by default uses constant score auto
rewriting when it generates a WildcardQuery and PrefixQuery (it
- already does so for RangeQuery, as well). Call
- setConstantScoreRewrite(false) to revert to BooleanQuery rewriting
- method. (Mark Miller via Mike McCandless)
+ already does so for TermRangeQuery, as well). Call
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
+ to revert to slower BooleanQuery rewriting method. (Mark Miller via Mike
+ McCandless)
2. LUCENE-1575: As of 2.9, the core collectors as well as
IndexSearcher's search methods that return top N results, no
@@ -296,10 +297,10 @@
includes more detailed status than previously. (Tim Smith via
Mike McCandless)
-28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed
- to TermRangeQuery and TermRangeFilter. TermRangeQuery is in
- constant score rewrite mode by default. The new classes also have
- new ctors taking field and term ranges as Strings (see also
+28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed to
+ TermRangeQuery and TermRangeFilter. TermRangeQuery is in constant
+ score auto rewrite mode by default. The new classes also have new
+ ctors taking field and term ranges as Strings (see also
LUCENE-1424). (Uwe Schindler)
29. LUCENE-1609: The termInfosIndexDivisor must now be specified
@@ -434,7 +435,7 @@
6. LUCENE-1424: Moved constant score query rewrite capability into
MultiTermQuery, allowing TermRangeQuery, PrefixQuery and WildcardQuery
to switch betwen constant-score rewriting or BooleanQuery
- expansion rewriting via a new setConstantScoreRewrite method.
+ expansion rewriting via a new setRewriteMethod method.
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
McCandless)
@@ -579,6 +580,16 @@
On 32 bit platforms, the address space can be very fragmented, so
one big ByteBuffer for the whole file may not fit into address space.
(Eks Dev via Uwe Schindler)
+
+33. LUCENE-1644: Enable 4 rewrite modes for queries deriving from
+ MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery,
+ NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a
+ filter and then assigns constant score (boost) to docs;
+ CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE create a BooleanQuery but
+ uses a constant score (boost); SCORING_BOOLEAN_QUERY_REWRITE also
+ creates a BooleanQuery but keeps the BooleanQuery's scores;
+ CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant
+ constant-score rewrite method. (Mike McCandless)
Optimizations
Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 795767)
+++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -46,6 +46,7 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -433,11 +434,11 @@
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
- assertTrue(((TermRangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite());
+ assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
- qp.setConstantScoreRewrite(false);
- assertFalse(((TermRangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite());
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
assertQueryEquals("{ a TO z}", null, "{a TO z}");
@@ -476,7 +477,7 @@
// supported).
// Test ConstantScoreRangeQuery
- qp.setConstantScoreRewrite(true);
+ qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
@@ -484,7 +485,7 @@
assertEquals("The index Term should be included.", 1, result.length);
// Test TermRangeQuery
- qp.setConstantScoreRewrite(false);
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
===================================================================
--- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy)
@@ -88,29 +88,35 @@
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
+ public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
+ TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ query.setRewriteMethod(method);
+ return query;
+ }
+
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il,
boolean ih, Collator c) {
TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
/** macro for readability */
public static Query cswcq(Term wild) {
WildcardQuery query = new WildcardQuery(wild);
- query.setConstantScoreRewrite(true);
+ query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
@@ -156,6 +162,14 @@
result[i].score);
}
+ result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs;
+ numHits = result.length;
+ assertEquals("wrong number of results", 6, numHits);
+ for (int i = 0; i < numHits; i++) {
+ assertEquals("score for " + i + " was not the same", score,
+ result[i].score);
+ }
+
}
public void testBoost() throws IOException {
@@ -201,6 +215,18 @@
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
+ q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0
+ q1.setBoost(.1f);
+ q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1
+ bq = new BooleanQuery(true);
+ bq.add(q1, BooleanClause.Occur.SHOULD);
+ bq.add(q2, BooleanClause.Occur.SHOULD);
+
+ hits = search.search(bq, null, 1000).scoreDocs;
+ assertEquals(1, hits[0].doc);
+ assertEquals(0, hits[1].doc);
+ assertTrue(hits[0].score > hits[1].score);
+
q1 = csrq("data", "A", "A", T, T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data", "Z", "Z", T, T); // matches document #1
@@ -268,21 +294,39 @@
result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
+ result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
+
result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
+ result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but last", numDocs - 1, result.length);
+
result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but first", numDocs - 1, result.length);
+
result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
+ result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("all but ends", numDocs - 2, result.length);
+
result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
+ result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med and up", 1 + maxId - medId, result.length);
+
result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
+ result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("up to med", 1 + medId - minId, result.length);
+
// unbounded id
result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs;
@@ -307,24 +351,51 @@
result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
+
+ result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
+
result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
+
+ result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med,med,F,F", 0, result.length);
+
result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
+ result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
+
result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
+
+ result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
+
result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
+ result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
+
result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
+
+ result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
+
result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
+ result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
+
result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
+ result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
+ assertEquals("med,med,T,T", 1, result.length);
}
public void testRangeQueryIdCollating() throws IOException {
Index: src/test/org/apache/lucene/search/JustCompileSearch.java
===================================================================
--- src/test/org/apache/lucene/search/JustCompileSearch.java (revision 795767)
+++ src/test/org/apache/lucene/search/JustCompileSearch.java (working copy)
@@ -292,7 +292,7 @@
static final class JustCompileMultiTermQuery extends MultiTermQuery {
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(IndexReader reader) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
Index: src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
===================================================================
--- src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
-import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@@ -104,13 +103,13 @@
switch (i) {
case 0:
type = " (constant score)";
- q.setConstantScoreRewrite(true);
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
- q.setConstantScoreRewrite(false);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@@ -229,7 +228,7 @@
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -238,7 +237,7 @@
// test exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -247,7 +246,7 @@
// test left exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -256,7 +255,7 @@
// test right exclusive range
tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Index: src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
===================================================================
--- src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 795767)
+++ src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy)
@@ -25,7 +25,6 @@
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
-import org.apache.lucene.search.SortField;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
@@ -108,13 +107,13 @@
switch (i) {
case 0:
type = " (constant score)";
- q.setConstantScoreRewrite(true);
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
case 1:
type = " (boolean query)";
- q.setConstantScoreRewrite(false);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER);
terms = q.getTotalNumberOfTerms();
break;
@@ -245,7 +244,7 @@
// test inclusive range
NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, true);
TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
TopDocs tTopDocs = searcher.search(tq, 1);
TopDocs cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -254,7 +253,7 @@
// test exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -263,7 +262,7 @@
// test left exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, true);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
@@ -272,7 +271,7 @@
// test right exclusive range
tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, false);
cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false);
- cq.setConstantScoreRewrite(true);
+ cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
tTopDocs = searcher.search(tq, 1);
cTopDocs = searcher.search(cq, 1);
assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits );
Index: src/java/org/apache/lucene/queryParser/Token.java
===================================================================
--- src/java/org/apache/lucene/queryParser/Token.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
+/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java
===================================================================
--- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy)
@@ -137,4 +137,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */
+/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -25,6 +25,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -118,7 +119,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
- boolean constantScoreRewrite= true;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@@ -331,40 +332,48 @@
}
/**
- * @deprecated Please use {@link #setConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- constantScoreRewrite = !useOldRangeQuery;
+ if (useOldRangeQuery) {
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ } else {
+ setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+ }
}
/**
- * @deprecated Please use {@link #getConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
- return !constantScoreRewrite;
+ if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
+ return true;
+ } else {
+ return false;
+ }
}
/**
- * By default QueryParser uses constant-score rewriting
+ * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then set this option to true
- * Default is false.
+ * points are not relevant then use this to change
+ * the rewrite method.
*/
- public void setConstantScoreRewrite(boolean v) {
- constantScoreRewrite = v;
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
}
/**
- * @see #setConstantScoreRewrite(boolean)
+ * @see #setMultiTermRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
}
/**
@@ -858,7 +867,7 @@
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -884,7 +893,7 @@
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -903,7 +912,7 @@
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -49,6 +49,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@@ -142,7 +143,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
- boolean constantScoreRewrite= true;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@@ -355,40 +356,48 @@
}
/**
- * @deprecated Please use {@link #setConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #setMultiTermRewriteMethod} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- constantScoreRewrite = !useOldRangeQuery;
+ if (useOldRangeQuery) {
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ } else {
+ setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+ }
}
/**
- * @deprecated Please use {@link #getConstantScoreRewrite} instead.
+ * @deprecated Please use {@link #getMultiTermRewriteMethod} instead.
*/
public boolean getUseOldRangeQuery() {
- return !constantScoreRewrite;
+ if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
+ return true;
+ } else {
+ return false;
+ }
}
/**
- * By default QueryParser uses constant-score rewriting
+ * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then set this option to true
- * Default is false.
+ * points are not relevant then use this to change
+ * the rewrite method.
*/
- public void setConstantScoreRewrite(boolean v) {
- constantScoreRewrite = v;
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
}
/**
- * @see #setConstantScoreRewrite(boolean)
+ * @see #setMultiTermRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
}
/**
@@ -882,7 +891,7 @@
*/
protected Query newPrefixQuery(Term prefix){
PrefixQuery query = new PrefixQuery(prefix);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -908,7 +917,7 @@
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
@@ -927,7 +936,7 @@
*/
protected Query newWildcardQuery(Term t) {
WildcardQuery query = new WildcardQuery(t);
- query.setConstantScoreRewrite(constantScoreRewrite);
+ query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
Index: src/java/org/apache/lucene/queryParser/CharStream.java
===================================================================
--- src/java/org/apache/lucene/queryParser/CharStream.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy)
@@ -109,4 +109,4 @@
void Done();
}
-/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
+/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ParseException.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ParseException.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy)
@@ -195,4 +195,4 @@
}
}
-/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
+/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (working copy)
@@ -29,6 +29,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@@ -77,9 +78,9 @@
public Query parse(String query) throws ParseException {
if (isPass2ResolvingPhrases) {
- boolean oldConstantScoreRewriteSetting = getConstantScoreRewrite();
+ MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod();
try {
- // Temporarily set constantScoreRewrite to false so that Parser will
+ // Temporarily force BooleanQuery rewrite so that Parser will
// generate visible
// collection of terms which we can convert into SpanQueries.
// ConstantScoreRewrite mode produces an
@@ -88,10 +89,10 @@
// QueryParser is not guaranteed threadsafe anyway so this temporary
// state change should not
// present an issue
- setConstantScoreRewrite(false);
+ setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return super.parse(query);
} finally {
- setConstantScoreRewrite(oldConstantScoreRewriteSetting);
+ setMultiTermRewriteMethod(oldMethod);
}
}
@@ -165,7 +166,7 @@
// that can be turned into SpanOr clause
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
getRangeCollator());
- rangeQuery.setConstantScoreRewrite(false);;
+ rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return rangeQuery;
}
return super.newRangeQuery(field, part1, part2, inclusive);
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 795767)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
Index: src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -18,6 +18,10 @@
*/
import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -34,24 +38,243 @@
* FilteredTermEnum} that iterates through the terms to be
* matched.
*
- *
NOTE: if {@link #setConstantScoreRewrite} is - * false, you may encounter a {@link - * BooleanQuery.TooManyClauses} exception during searching, - * which happens when the number of terms to be searched - * exceeds {@link BooleanQuery#getMaxClauseCount()}. - * Setting {@link #setConstantScoreRewrite} to false + *
NOTE: if {@link #setRewriteMethod} is either + * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + * {@link BooleanQuery.TooManyClauses} exception during + * searching, which happens when the number of terms to be + * searched exceeds {@link + * BooleanQuery#getMaxClauseCount()}. Setting {@link + * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} * prevents this. * + *
The recommended rewrite method is {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + * computing unhelpful scores, and it tries to pick the most + * performant rewrite method given the query. + * * Note that {@link QueryParser} by default produces - * MultiTermQueries with {@link #setConstantScoreRewrite} - * true. + * MultiTermQueries with {@link #setRewriteMethod} + * {@link #CONSTANT_SCORE_FILTER_REWRITE}. */ public abstract class MultiTermQuery extends Query { /* @deprecated move to sub class */ protected Term term; - protected boolean constantScoreRewrite = false; + protected RewriteMethod rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; transient int numberOfTerms = 0; + public static abstract class RewriteMethod implements Serializable { + public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; + } + + private static final class ConstantScoreFilterRewrite extends RewriteMethod implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + } + + /** A rewrite method that first creates a private Filter, + * by visiting each term in sequence and marking all docs + * for that term. Matching documents are assigned a + * constant score equal to the query's boost. + * + *
This method is faster than the BooleanQuery
+ * rewrite methods when the number of matched terms or
+ * matched documents is non-trivial. Also, it will never
+ * hit an errant {@link BooleanQuery.TooManyClauses}
+ * exception.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite();
+
+ private static class ScoringBooleanQueryRewrite extends RewriteMethod implements Serializable {
+ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+
+ FilteredTermEnum enumerator = query.getEnum(reader);
+ BooleanQuery result = new BooleanQuery(true);
+ int count = 0;
+ try {
+ do {
+ Term t = enumerator.term();
+ if (t != null) {
+ count++;
+ TermQuery tq = new TermQuery(t); // found a match
+ tq.setBoost(query.getBoost() * enumerator.difference()); // set the boost
+ result.add(tq, BooleanClause.Occur.SHOULD); // add to query
+ }
+ } while (enumerator.next());
+ } finally {
+ enumerator.close();
+ }
+ query.incTotalNumberOfTerms(count);
+ return result;
+ }
+ }
+
+ /** A rewrite method that first translates each term into
+ * {@link BooleanClause.Occur#SHOULD} clause in a
+ * BooleanQuery, and keeps the scores as computed by the
+ * query. Note that typically such scores are useless to
+ * the application, and require substantial CPU to
+ * compute, so it's almost always better to use {@link
+ * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.. This rewrite
+ * method will hit {@link BooleanQuery.TooManyClauses} if
+ * the number of terms exceeds {@link
+ * BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite();
+
+ private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable {
+ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+ // strip the scores off
+ Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query)));
+ result.setBoost(query.getBoost());
+ return result;
+ }
+ }
+
+ /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except
+ * scores are not computed. Instead, each matching
+ * document receives a constant score equal to the
+ * query's boost. This rewite method can hit {@link
+ * BooleanQuery.TooManyClauses} if the number of terms
+ * exceeds {@link BooleanQuery#getMaxClauseCount}.
+ *
+ * @see #setRewriteMethod */
+ public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite();
+
+
+ /** A rewrite method that tries to pick the best
+ * constant-score rewrite method based on term and
+ * document counts from the query. If both the number of
+ * terms and documents is small enough, then {@link
+ * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.
+ * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
+ * used.
+ */
+ public final static class ConstantScoreAutoRewrite extends RewriteMethod implements Serializable {
+
+ public static int DEFAULT_TERM_COUNT_CUTOFF = 10000;
+ public static int DEFAULT_DOC_COUNT_PERCENT = 1;
+
+ private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
+ private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
+
+ /** If the number of terms in this query is equal to or
+ * larger than this setting then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used. */
+ public void setTermCountCutoff(int count) {
+ termCountCutoff = count;
+ }
+
+ /** @see #setTermCountCutoff */
+ public int getTermCountCutoff() {
+ return termCountCutoff;
+ }
+
+ /** If the number of documents to be visited in the
+ * postings exceeds this specified percentage of the
+ * maxDoc() for the index, then {@link
+ * #CONSTANT_SCORE_FILTER_REWRITE} is used.
+ * @param percent 0.0 to 100.0 */
+ public void setDocCountPercent(double percent) {
+ docCountPercent = percent;
+ }
+
+ /** @see #setDocCountPercent */
+ public double getDocCountPercent() {
+ return docCountPercent;
+ }
+
+ public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+ // Get the enum and start visiting terms. If we
+ // exhaust the enum before hitting either of the
+ // cutoffs, we use ConstantBooleanQueryRewrite; else,
+ // ConstantFilterRewrite:
+ final Collection pendingTerms = new ArrayList();
+ final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
+ final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
+ int docVisitCount = 0;
+
+ FilteredTermEnum enumerator = query.getEnum(reader);
+ try {
+ while(true) {
+ Term t = enumerator.term();
+ if (t != null) {
+ pendingTerms.add(t);
+ // Loading the TermInfo from the terms dict here
+ // should not be costly, because 1) the
+ // query/filter will load the TermInfo when it
+ // runs, and 2) the terms dict has a cache:
+ docVisitCount += reader.docFreq(t);
+ }
+
+ if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
+ // Too many terms -- make a filter.
+ Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
+ result.setBoost(query.getBoost());
+ return result;
+ } else if (!enumerator.next()) {
+ // Enumeration is done, and we hit a small
+ // enough number of terms & docs -- just make a
+ // BooleanQuery, now
+ Iterator it = pendingTerms.iterator();
+ BooleanQuery bq = new BooleanQuery(true);
+ while(it.hasNext()) {
+ TermQuery tq = new TermQuery((Term) it.next());
+ bq.add(tq, BooleanClause.Occur.SHOULD);
+ }
+ // Strip scores
+ Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ result.setBoost(query.getBoost());
+ return result;
+ }
+ }
+ } finally {
+ enumerator.close();
+ }
+ }
+
+ public int hashCode() {
+ final int prime = 1279;
+ return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
+ }
+
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+
+ ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
+ if (other.termCountCutoff != termCountCutoff) {
+ return false;
+ }
+
+ if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ /** Default instance of {@link ConstantScoreAutoRewrite},
+ * with {@link
+ * ConstantScoreAutoRewrite#setTermCountCutoff} set to
+ * {@link
+ * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
+ * and {@link
+ * ConstantScoreAutoRewrite#setDocCountPercent} set to
+ * {@link
+ * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}*/
+ public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite();
+
/** Constructs a query for terms matching This query uses {@linkplain
+ * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}. If you
+ * want to change this, use the new {@link TermRangeQuery}
+ * instead.
*
* @deprecated Use {@link TermRangeQuery} for term ranges or
* {@link NumericRangeQuery} for numeric ranges instead.
@@ -93,7 +94,7 @@
inclusive, inclusive,
collator
);
- delegate.setConstantScoreRewrite(false);
+ delegate.setRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE);
}
public void setBoost(float b) {
Index: src/java/org/apache/lucene/search/PrefixQuery.java
===================================================================
--- src/java/org/apache/lucene/search/PrefixQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy)
@@ -24,7 +24,15 @@
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
- * is built by QueryParser for input like NOTE: Currently this query uses {@link
+ * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which
+ * assigns not-very-useful scores to the resulting hits. In
+ * 3.0 this default will change to {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you
+ * can use {@link MultiTermQuery#setRewriteMethod} to change
+ * it. */
public class PrefixQuery extends MultiTermQuery {
private Term prefix;
@@ -37,7 +45,7 @@
/** Returns the prefix of this query. */
public Term getPrefix() { return prefix; }
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new PrefixTermEnum(reader, prefix);
}
Index: src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (working copy)
@@ -29,9 +29,8 @@
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
*
- * This query is in
- * {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) constant score rewrite mode}.
- * If you want to change this, use the new {@link TermRangeQuery} instead.
+ * This query is hardwired to {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}.
+ * If you want to change this, use {@link TermRangeQuery} instead.
*
* @deprecated Use {@link TermRangeQuery} for term ranges or
* {@link NumericRangeQuery} for numeric ranges instead.
@@ -44,14 +43,14 @@
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
{
super(fieldName, lowerVal, upperVal, includeLower, includeUpper);
- this.constantScoreRewrite = true;
+ rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
String upperVal, boolean includeLower,
boolean includeUpper, Collator collator) {
super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator);
- this.constantScoreRewrite = true;
+ rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
public String getLowerVal() {
@@ -63,8 +62,9 @@
}
/** Changes of mode are not supported by this class (fixed to constant score rewrite mode) */
- public void setConstantScoreRewrite(boolean constantScoreRewrite) {
- if (!constantScoreRewrite)
- throw new UnsupportedOperationException("Use TermRangeQuery instead to enable boolean query rewrite.");
+ public void setRewriteMethod(RewriteMethod method) {
+ if (method != CONSTANT_SCORE_FILTER_REWRITE) {
+ throw new UnsupportedOperationException("Use TermRangeQuery instead to change the rewrite method.");
+ }
}
}
Index: src/java/org/apache/lucene/search/NumericRangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy)
@@ -123,12 +123,12 @@
*
* Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed
* that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count)
- * took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs
+ * took about 30-40 secs to complete, {@link TermRangeQuery} in filter rewrite mode took 5 secs
* and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit
* precision step). This query type was developed for a geographic portal, where the performance for
* e.g. bounding boxes or exact date/time stamps is important. The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}.
+ * The query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
* With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery}
* rewrite mode without changing the max clause count.
*
@@ -152,7 +152,7 @@
this.max = max;
this.minInclusive = minInclusive;
this.maxInclusive = maxInclusive;
- setConstantScoreRewrite(true);
+ setRewriteMethod(CONSTANT_SCORE_FILTER_REWRITE);
}
/**
@@ -260,7 +260,7 @@
}
//@Override
- protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(final IndexReader reader) throws IOException {
return new NumericRangeTermEnum(reader);
}
Index: src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.text.Collator;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
@@ -31,9 +30,9 @@
* supplied range according to {@link String#compareTo(String)}. It is not intended
* for numerical ranges, use {@link NumericRangeQuery} instead.
*
- * This query is in constant score mode per default.
- * See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between
- * enabling and disabling constantScoreRewrite mode.
+ * This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
* @since 2.9
*/
@@ -110,7 +109,7 @@
this.includeLower = includeLower;
this.includeUpper = includeUpper;
this.collator = collator;
- this.constantScoreRewrite = true;
+ rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
}
/** Returns the field name for this query */
@@ -131,7 +130,7 @@
/** Returns the collator used to determine range inclusion, if any. */
public Collator getCollator() { return collator; }
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new TermRangeTermEnum(reader, field, lowerTerm,
upperTerm, includeLower, includeUpper, collator);
}
Index: src/java/org/apache/lucene/search/WildcardQuery.java
===================================================================
--- src/java/org/apache/lucene/search/WildcardQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy)
@@ -30,8 +30,14 @@
* a Wildcard term should not start with one of the wildcards NOTE: Currently this query uses {@link
+ * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which
+ * assigns not-very-useful scores to the resulting hits. In
+ * 3.0 this default will change to {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you can use {@link
+ * MultiTermQuery#setRewriteMethod} to change it.
+ *
+ * @see WildcardTermEnum */
public class WildcardQuery extends MultiTermQuery {
private boolean termContainsWildcard;
protected Term term;
@@ -42,7 +48,7 @@
this.termContainsWildcard = (term.text().indexOf('*') != -1) || (term.text().indexOf('?') != -1);
}
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new WildcardTermEnum(reader, getTerm());
}
Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
===================================================================
--- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 795767)
+++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy)
@@ -28,14 +28,11 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.FuzzyQuery;
@@ -139,9 +136,9 @@
terms.putAll(disjunctTerms);
} else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) {
MultiTermQuery mtq = ((MultiTermQuery)query);
- if(mtq.getConstantScoreRewrite()) {
+ if(mtq.getRewriteMethod() == MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE) {
mtq = copyMultiTermQuery(mtq);
- mtq.setConstantScoreRewrite(false);
+ mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = mtq;
}
String field;
term. */
public MultiTermQuery(Term term) {
this.term = term;
@@ -105,33 +328,12 @@
numberOfTerms = 0;
}
- protected Filter getFilter() {
- return new MultiTermQueryWrapperFilter(this);
+ public void incTotalNumberOfTerms(int inc) {
+ numberOfTerms += inc;
}
public Query rewrite(IndexReader reader) throws IOException {
- if (!constantScoreRewrite) {
- FilteredTermEnum enumerator = getEnum(reader);
- BooleanQuery query = new BooleanQuery(true);
- try {
- do {
- Term t = enumerator.term();
- if (t != null) {
- numberOfTerms++;
- TermQuery tq = new TermQuery(t); // found a match
- tq.setBoost(getBoost() * enumerator.difference()); // set the boost
- query.add(tq, BooleanClause.Occur.SHOULD); // add to query
- }
- } while (enumerator.next());
- } finally {
- enumerator.close();
- }
- return query;
- } else {
- Query query = new ConstantScoreQuery(getFilter());
- query.setBoost(getBoost());
- return query;
- }
+ return rewriteMethod.rewrite(reader, this);
}
@@ -155,10 +357,10 @@
}
/**
- * @see #setConstantScoreRewrite
+ * @see #setRewriteMethod
*/
- public boolean getConstantScoreRewrite() {
- return constantScoreRewrite;
+ public RewriteMethod getRewriteMethod() {
+ return rewriteMethod;
}
/**
@@ -181,8 +383,8 @@
* query's boost.
*
*/
- public void setConstantScoreRewrite(boolean constantScoreRewrite) {
- this.constantScoreRewrite = constantScoreRewrite;
+ public void setRewriteMethod(RewriteMethod method) {
+ rewriteMethod = method;
}
//@Override
@@ -190,7 +392,8 @@
final int prime = 31;
int result = 1;
result = prime * result + Float.floatToIntBits(getBoost());
- result = prime * result + (constantScoreRewrite ? 1231 : 1237);
+ result = prime * result;
+ result += rewriteMethod.hashCode();
return result;
}
@@ -205,8 +408,9 @@
MultiTermQuery other = (MultiTermQuery) obj;
if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost()))
return false;
- if (constantScoreRewrite != other.constantScoreRewrite)
+ if (!rewriteMethod.equals(other.rewriteMethod)) {
return false;
+ }
return true;
}
Index: src/java/org/apache/lucene/search/FuzzyQuery.java
===================================================================
--- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 795767)
+++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy)
@@ -71,6 +71,7 @@
this.minimumSimilarity = minimumSimilarity;
this.prefixLength = prefixLength;
+ rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE;
}
/**
@@ -104,7 +105,7 @@
return prefixLength;
}
- protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ public FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
}
@@ -115,8 +116,8 @@
return term;
}
- public void setConstantScoreRewrite(boolean constantScoreRewrite) {
- throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query");
+ public void setRewriteMethod(RewriteMethod method) {
+ throw new UnsupportedOperationException("FuzzyQuery cannot change rewrite method");
}
public Query rewrite(IndexReader reader) throws IOException {
Index: src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
===================================================================
--- src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 796761)
+++ src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy)
@@ -37,7 +37,8 @@
* For example, {@link TermRangeFilter} and {@link PrefixFilter} extend
* MultiTermQueryWrapperFilter.
* This class also provides the functionality behind
- * {@link MultiTermQuery#getFilter}, this is why it is not abstract.
+ * {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE};
+ * this is why it is not abstract.
*/
public class MultiTermQueryWrapperFilter extends Filter {
@@ -96,6 +97,8 @@
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
+ final int[] docs = new int[32];
+ final int[] freqs = new int[32];
TermDocs termDocs = reader.termDocs();
try {
do {
@@ -104,8 +107,15 @@
break;
query.numberOfTerms++;
termDocs.seek(term);
- while (termDocs.next()) {
- handleDoc(termDocs.doc());
+ while (true) {
+ final int count = termDocs.read(docs, freqs);
+ if (count != 0) {
+ for(int i=0;iapp*. */
+ * is built by QueryParser for input like app*.
+ *
+ * * or
* ?.
*
- * @see WildcardTermEnum
- */
+ *