Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 795767) +++ CHANGES.txt (working copy) @@ -66,11 +66,12 @@ Changes in runtime behavior - 1. LUCENE-1424: QueryParser now by default uses constant score query + 1. LUCENE-1424: QueryParser now by default uses constant score auto rewriting when it generates a WildcardQuery and PrefixQuery (it - already does so for RangeQuery, as well). Call - setConstantScoreRewrite(false) to revert to BooleanQuery rewriting - method. (Mark Miller via Mike McCandless) + already does so for TermRangeQuery, as well). Call + setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) + to revert to slower BooleanQuery rewriting method. (Mark Miller via Mike + McCandless) 2. LUCENE-1575: As of 2.9, the core collectors as well as IndexSearcher's search methods that return top N results, no @@ -296,10 +297,10 @@ includes more detailed status than previously. (Tim Smith via Mike McCandless) -28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed - to TermRangeQuery and TermRangeFilter. TermRangeQuery is in - constant score rewrite mode by default. The new classes also have - new ctors taking field and term ranges as Strings (see also +28. LUCENE-1713: Deprecated RangeQuery and RangeFilter and renamed to + TermRangeQuery and TermRangeFilter. TermRangeQuery is in constant + score auto rewrite mode by default. The new classes also have new + ctors taking field and term ranges as Strings (see also LUCENE-1424). (Uwe Schindler) 29. LUCENE-1609: The termInfosIndexDivisor must now be specified @@ -434,7 +435,7 @@ 6. LUCENE-1424: Moved constant score query rewrite capability into MultiTermQuery, allowing TermRangeQuery, PrefixQuery and WildcardQuery to switch betwen constant-score rewriting or BooleanQuery - expansion rewriting via a new setConstantScoreRewrite method. + expansion rewriting via a new setRewriteMethod method. Deprecated ConstantScoreRangeQuery (Mark Miller via Mike McCandless) @@ -579,6 +580,16 @@ On 32 bit platforms, the address space can be very fragmented, so one big ByteBuffer for the whole file may not fit into address space. (Eks Dev via Uwe Schindler) + +33. LUCENE-1644: Enable 4 rewrite modes for queries deriving from + MultiTermQuery (WildcardQuery, PrefixQuery, TermRangeQuery, + NumericRangeQuery): CONSTANT_SCORE_FILTER_REWRITE first creates a + filter and then assigns constant score (boost) to docs; + CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE create a BooleanQuery but + uses a constant score (boost); SCORING_BOOLEAN_QUERY_REWRITE also + creates a BooleanQuery but keeps the BooleanQuery's scores; + CONSTANT_SCORE_AUTO_REWRITE tries to pick the most performant + constant-score rewrite method. (Mike McCandless) Optimizations Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 795767) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -46,6 +46,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -433,11 +434,11 @@ public void testRange() throws Exception { assertQueryEquals("[ a TO z]", null, "[a TO z]"); - assertTrue(((TermRangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite()); + assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod()); QueryParser qp = new QueryParser("field", new SimpleAnalyzer()); - qp.setConstantScoreRewrite(false); - assertFalse(((TermRangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite()); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod()); assertQueryEquals("[ a TO z ]", null, "[a TO z]"); assertQueryEquals("{ a TO z}", null, "{a TO z}"); @@ -476,7 +477,7 @@ // supported). // Test ConstantScoreRangeQuery - qp.setConstantScoreRewrite(true); + qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); @@ -484,7 +485,7 @@ assertEquals("The index Term should be included.", 1, result.length); // Test TermRangeQuery - qp.setConstantScoreRewrite(false); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 795767) +++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -88,29 +88,35 @@ /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih) { TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); - query.setConstantScoreRewrite(true); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } + public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) { + TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + query.setRewriteMethod(method); + return query; + } + /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih, Collator c) { TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); - query.setConstantScoreRewrite(true); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } /** macro for readability */ public static Query cspq(Term prefix) { PrefixQuery query = new PrefixQuery(prefix); - query.setConstantScoreRewrite(true); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } /** macro for readability */ public static Query cswcq(Term wild) { WildcardQuery query = new WildcardQuery(wild); - query.setConstantScoreRewrite(true); + query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } @@ -156,6 +162,14 @@ result[i].score); } + result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score); + } + } public void testBoost() throws IOException { @@ -201,6 +215,18 @@ assertEquals(0, hits[1].doc); assertTrue(hits[0].score > hits[1].score); + q1 = csrq("data", "A", "A", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #0 + q1.setBoost(.1f); + q2 = csrq("data", "Z", "Z", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq, null, 1000).scoreDocs; + assertEquals(1, hits[0].doc); + assertEquals(0, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + q1 = csrq("data", "A", "A", T, T); // matches document #0 q1.setBoost(10f); q2 = csrq("data", "Z", "Z", T, T); // matches document #1 @@ -268,21 +294,39 @@ result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs; assertEquals("find all", numDocs, result.length); + result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs; assertEquals("all but last", numDocs - 1, result.length); + result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs; assertEquals("all but first", numDocs - 1, result.length); + result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs; assertEquals("all but ends", numDocs - 2, result.length); + result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs; assertEquals("med and up", 1 + maxId - medId, result.length); + result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs; assertEquals("up to med", 1 + medId - minId, result.length); + result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + // unbounded id result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs; @@ -307,24 +351,51 @@ result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs; assertEquals("min,min,F,F", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs; assertEquals("med,med,F,F", 0, result.length); + + result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs; assertEquals("max,max,F,F", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs; assertEquals("min,min,T,T", 1, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs; assertEquals("nul,min,F,T", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs; assertEquals("max,max,T,T", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); + result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); } public void testRangeQueryIdCollating() throws IOException { Index: src/test/org/apache/lucene/search/JustCompileSearch.java =================================================================== --- src/test/org/apache/lucene/search/JustCompileSearch.java (revision 795767) +++ src/test/org/apache/lucene/search/JustCompileSearch.java (working copy) @@ -292,7 +292,7 @@ static final class JustCompileMultiTermQuery extends MultiTermQuery { - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(IndexReader reader) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } Index: src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 795767) +++ src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -25,7 +25,6 @@ import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.search.SortField; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; @@ -104,13 +103,13 @@ switch (i) { case 0: type = " (constant score)"; - q.setConstantScoreRewrite(true); + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; case 1: type = " (boolean query)"; - q.setConstantScoreRewrite(false); + q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; @@ -229,7 +228,7 @@ // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, true); TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -238,7 +237,7 @@ // test exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, false); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -247,7 +246,7 @@ // test left exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), false, true); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -256,7 +255,7 @@ // test right exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, new Integer(lower), new Integer(upper), true, false); cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 795767) +++ src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -25,7 +25,6 @@ import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; -import org.apache.lucene.search.SortField; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; @@ -108,13 +107,13 @@ switch (i) { case 0: type = " (constant score)"; - q.setConstantScoreRewrite(true); + q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; case 1: type = " (boolean query)"; - q.setConstantScoreRewrite(false); + q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); topDocs = searcher.search(q, null, noDocs, Sort.INDEXORDER); terms = q.getTotalNumberOfTerms(); break; @@ -245,7 +244,7 @@ // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, true); TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -254,7 +253,7 @@ // test exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, false); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -263,7 +262,7 @@ // test left exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), false, true); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -272,7 +271,7 @@ // test right exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, new Long(lower), new Long(upper), true, false); cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false); - cq.setConstantScoreRewrite(true); + cq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: src/java/org/apache/lucene/queryParser/Token.java =================================================================== --- src/java/org/apache/lucene/queryParser/Token.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/Token.java (working copy) @@ -121,4 +121,4 @@ } } -/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */ +/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java =================================================================== --- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy) @@ -137,4 +137,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */ +/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; @@ -118,7 +119,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean constantScoreRewrite= true; + MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -331,40 +332,48 @@ } /** - * @deprecated Please use {@link #setConstantScoreRewrite} instead. + * @deprecated Please use {@link #setMultiTermRewriteMethod} instead. */ public void setUseOldRangeQuery(boolean useOldRangeQuery) { - constantScoreRewrite = !useOldRangeQuery; + if (useOldRangeQuery) { + setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + } else { + setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + } } /** - * @deprecated Please use {@link #getConstantScoreRewrite} instead. + * @deprecated Please use {@link #getMultiTermRewriteMethod} instead. */ public boolean getUseOldRangeQuery() { - return !constantScoreRewrite; + if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { + return true; + } else { + return false; + } } /** - * By default QueryParser uses constant-score rewriting + * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it * a) Runs faster b) Does not have the scarcity of terms unduly influence score * c) avoids any "TooManyBooleanClauses" exception. * However, if your application really needs to use the * old-fashioned BooleanQuery expansion rewriting and the above - * points are not relevant then set this option to true - * Default is false. + * points are not relevant then use this to change + * the rewrite method. */ - public void setConstantScoreRewrite(boolean v) { - constantScoreRewrite = v; + public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { + multiTermRewriteMethod = method; } /** - * @see #setConstantScoreRewrite(boolean) + * @see #setMultiTermRewriteMethod */ - public boolean getConstantScoreRewrite() { - return constantScoreRewrite; + public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { + return multiTermRewriteMethod; } /** @@ -858,7 +867,7 @@ */ protected Query newPrefixQuery(Term prefix){ PrefixQuery query = new PrefixQuery(prefix); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -884,7 +893,7 @@ */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -903,7 +912,7 @@ */ protected Query newWildcardQuery(Term t) { WildcardQuery query = new WildcardQuery(t); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 795767) +++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -49,6 +49,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; @@ -142,7 +143,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean constantScoreRewrite= true; + MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -355,40 +356,48 @@ } /** - * @deprecated Please use {@link #setConstantScoreRewrite} instead. + * @deprecated Please use {@link #setMultiTermRewriteMethod} instead. */ public void setUseOldRangeQuery(boolean useOldRangeQuery) { - constantScoreRewrite = !useOldRangeQuery; + if (useOldRangeQuery) { + setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + } else { + setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + } } /** - * @deprecated Please use {@link #getConstantScoreRewrite} instead. + * @deprecated Please use {@link #getMultiTermRewriteMethod} instead. */ public boolean getUseOldRangeQuery() { - return !constantScoreRewrite; + if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { + return true; + } else { + return false; + } } /** - * By default QueryParser uses constant-score rewriting + * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it * a) Runs faster b) Does not have the scarcity of terms unduly influence score * c) avoids any "TooManyBooleanClauses" exception. * However, if your application really needs to use the * old-fashioned BooleanQuery expansion rewriting and the above - * points are not relevant then set this option to true - * Default is false. + * points are not relevant then use this to change + * the rewrite method. */ - public void setConstantScoreRewrite(boolean v) { - constantScoreRewrite = v; + public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { + multiTermRewriteMethod = method; } /** - * @see #setConstantScoreRewrite(boolean) + * @see #setMultiTermRewriteMethod */ - public boolean getConstantScoreRewrite() { - return constantScoreRewrite; + public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { + return multiTermRewriteMethod; } /** @@ -882,7 +891,7 @@ */ protected Query newPrefixQuery(Term prefix){ PrefixQuery query = new PrefixQuery(prefix); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -908,7 +917,7 @@ */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -927,7 +936,7 @@ */ protected Query newWildcardQuery(Term t) { WildcardQuery query = new WildcardQuery(t); - query.setConstantScoreRewrite(constantScoreRewrite); + query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: src/java/org/apache/lucene/queryParser/CharStream.java =================================================================== --- src/java/org/apache/lucene/queryParser/CharStream.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy) @@ -109,4 +109,4 @@ void Done(); } -/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */ +/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/ParseException.java =================================================================== --- src/java/org/apache/lucene/queryParser/ParseException.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy) @@ -195,4 +195,4 @@ } } -/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */ +/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/ComplexPhraseQueryParser.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotQuery; @@ -77,9 +78,9 @@ public Query parse(String query) throws ParseException { if (isPass2ResolvingPhrases) { - boolean oldConstantScoreRewriteSetting = getConstantScoreRewrite(); + MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod(); try { - // Temporarily set constantScoreRewrite to false so that Parser will + // Temporarily force BooleanQuery rewrite so that Parser will // generate visible // collection of terms which we can convert into SpanQueries. // ConstantScoreRewrite mode produces an @@ -88,10 +89,10 @@ // QueryParser is not guaranteed threadsafe anyway so this temporary // state change should not // present an issue - setConstantScoreRewrite(false); + setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return super.parse(query); } finally { - setConstantScoreRewrite(oldConstantScoreRewriteSetting); + setMultiTermRewriteMethod(oldMethod); } } @@ -165,7 +166,7 @@ // that can be turned into SpanOr clause TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, getRangeCollator()); - rangeQuery.setConstantScoreRewrite(false);; + rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } return super.newRangeQuery(field, part1, part2, inclusive); Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 795767) +++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -18,6 +18,10 @@ */ import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -34,24 +38,243 @@ * FilteredTermEnum} that iterates through the terms to be * matched. * - *

NOTE: if {@link #setConstantScoreRewrite} is - * false, you may encounter a {@link - * BooleanQuery.TooManyClauses} exception during searching, - * which happens when the number of terms to be searched - * exceeds {@link BooleanQuery#getMaxClauseCount()}. - * Setting {@link #setConstantScoreRewrite} to false + *

NOTE: if {@link #setRewriteMethod} is either + * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + * {@link BooleanQuery.TooManyClauses} exception during + * searching, which happens when the number of terms to be + * searched exceeds {@link + * BooleanQuery#getMaxClauseCount()}. Setting {@link + * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} * prevents this. * + *

The recommended rewrite method is {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + * computing unhelpful scores, and it tries to pick the most + * performant rewrite method given the query. + * * Note that {@link QueryParser} by default produces - * MultiTermQueries with {@link #setConstantScoreRewrite} - * true. + * MultiTermQueries with {@link #setRewriteMethod} + * {@link #CONSTANT_SCORE_FILTER_REWRITE}. */ public abstract class MultiTermQuery extends Query { /* @deprecated move to sub class */ protected Term term; - protected boolean constantScoreRewrite = false; + protected RewriteMethod rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; transient int numberOfTerms = 0; + public static abstract class RewriteMethod implements Serializable { + public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; + } + + private static final class ConstantScoreFilterRewrite extends RewriteMethod implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + } + + /** A rewrite method that first creates a private Filter, + * by visiting each term in sequence and marking all docs + * for that term. Matching documents are assigned a + * constant score equal to the query's boost. + * + *

This method is faster than the BooleanQuery + * rewrite methods when the number of matched terms or + * matched documents is non-trivial. Also, it will never + * hit an errant {@link BooleanQuery.TooManyClauses} + * exception. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite(); + + private static class ScoringBooleanQueryRewrite extends RewriteMethod implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + + FilteredTermEnum enumerator = query.getEnum(reader); + BooleanQuery result = new BooleanQuery(true); + int count = 0; + try { + do { + Term t = enumerator.term(); + if (t != null) { + count++; + TermQuery tq = new TermQuery(t); // found a match + tq.setBoost(query.getBoost() * enumerator.difference()); // set the boost + result.add(tq, BooleanClause.Occur.SHOULD); // add to query + } + } while (enumerator.next()); + } finally { + enumerator.close(); + } + query.incTotalNumberOfTerms(count); + return result; + } + } + + /** A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a + * BooleanQuery, and keeps the scores as computed by the + * query. Note that typically such scores are useless to + * the application, and require substantial CPU to + * compute, so it's almost always better to use {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.. This rewrite + * method will hit {@link BooleanQuery.TooManyClauses} if + * the number of terms exceeds {@link + * BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); + + private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + // strip the scores off + Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query))); + result.setBoost(query.getBoost()); + return result; + } + } + + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. This rewite method can hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite(); + + + /** A rewrite method that tries to pick the best + * constant-score rewrite method based on term and + * document counts from the query. If both the number of + * terms and documents is small enough, then {@link + * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. + * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is + * used. + */ + public final static class ConstantScoreAutoRewrite extends RewriteMethod implements Serializable { + + public static int DEFAULT_TERM_COUNT_CUTOFF = 10000; + public static int DEFAULT_DOC_COUNT_PERCENT = 1; + + private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; + private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT; + + /** If the number of terms in this query is equal to or + * larger than this setting then {@link + * #CONSTANT_SCORE_FILTER_REWRITE} is used. */ + public void setTermCountCutoff(int count) { + termCountCutoff = count; + } + + /** @see #setTermCountCutoff */ + public int getTermCountCutoff() { + return termCountCutoff; + } + + /** If the number of documents to be visited in the + * postings exceeds this specified percentage of the + * maxDoc() for the index, then {@link + * #CONSTANT_SCORE_FILTER_REWRITE} is used. + * @param percent 0.0 to 100.0 */ + public void setDocCountPercent(double percent) { + docCountPercent = percent; + } + + /** @see #setDocCountPercent */ + public double getDocCountPercent() { + return docCountPercent; + } + + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + // Get the enum and start visiting terms. If we + // exhaust the enum before hitting either of the + // cutoffs, we use ConstantBooleanQueryRewrite; else, + // ConstantFilterRewrite: + final Collection pendingTerms = new ArrayList(); + final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc()); + final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff); + int docVisitCount = 0; + + FilteredTermEnum enumerator = query.getEnum(reader); + try { + while(true) { + Term t = enumerator.term(); + if (t != null) { + pendingTerms.add(t); + // Loading the TermInfo from the terms dict here + // should not be costly, because 1) the + // query/filter will load the TermInfo when it + // runs, and 2) the terms dict has a cache: + docVisitCount += reader.docFreq(t); + } + + if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { + // Too many terms -- make a filter. + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } else if (!enumerator.next()) { + // Enumeration is done, and we hit a small + // enough number of terms & docs -- just make a + // BooleanQuery, now + Iterator it = pendingTerms.iterator(); + BooleanQuery bq = new BooleanQuery(true); + while(it.hasNext()) { + TermQuery tq = new TermQuery((Term) it.next()); + bq.add(tq, BooleanClause.Occur.SHOULD); + } + // Strip scores + Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + result.setBoost(query.getBoost()); + return result; + } + } + } finally { + enumerator.close(); + } + } + + public int hashCode() { + final int prime = 1279; + return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent)); + } + + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + + ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj; + if (other.termCountCutoff != termCountCutoff) { + return false; + } + + if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) { + return false; + } + + return true; + } + } + + /** Default instance of {@link ConstantScoreAutoRewrite}, + * with {@link + * ConstantScoreAutoRewrite#setTermCountCutoff} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} + * and {@link + * ConstantScoreAutoRewrite#setDocCountPercent} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}*/ + public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite(); + /** Constructs a query for terms matching term. */ public MultiTermQuery(Term term) { this.term = term; @@ -105,33 +328,12 @@ numberOfTerms = 0; } - protected Filter getFilter() { - return new MultiTermQueryWrapperFilter(this); + public void incTotalNumberOfTerms(int inc) { + numberOfTerms += inc; } public Query rewrite(IndexReader reader) throws IOException { - if (!constantScoreRewrite) { - FilteredTermEnum enumerator = getEnum(reader); - BooleanQuery query = new BooleanQuery(true); - try { - do { - Term t = enumerator.term(); - if (t != null) { - numberOfTerms++; - TermQuery tq = new TermQuery(t); // found a match - tq.setBoost(getBoost() * enumerator.difference()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - } while (enumerator.next()); - } finally { - enumerator.close(); - } - return query; - } else { - Query query = new ConstantScoreQuery(getFilter()); - query.setBoost(getBoost()); - return query; - } + return rewriteMethod.rewrite(reader, this); } @@ -155,10 +357,10 @@ } /** - * @see #setConstantScoreRewrite + * @see #setRewriteMethod */ - public boolean getConstantScoreRewrite() { - return constantScoreRewrite; + public RewriteMethod getRewriteMethod() { + return rewriteMethod; } /** @@ -181,8 +383,8 @@ * query's boost. * */ - public void setConstantScoreRewrite(boolean constantScoreRewrite) { - this.constantScoreRewrite = constantScoreRewrite; + public void setRewriteMethod(RewriteMethod method) { + rewriteMethod = method; } //@Override @@ -190,7 +392,8 @@ final int prime = 31; int result = 1; result = prime * result + Float.floatToIntBits(getBoost()); - result = prime * result + (constantScoreRewrite ? 1231 : 1237); + result = prime * result; + result += rewriteMethod.hashCode(); return result; } @@ -205,8 +408,9 @@ MultiTermQuery other = (MultiTermQuery) obj; if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost())) return false; - if (constantScoreRewrite != other.constantScoreRewrite) + if (!rewriteMethod.equals(other.rewriteMethod)) { return false; + } return true; } Index: src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -71,6 +71,7 @@ this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; + rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; } /** @@ -104,7 +105,7 @@ return prefixLength; } - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); } @@ -115,8 +116,8 @@ return term; } - public void setConstantScoreRewrite(boolean constantScoreRewrite) { - throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query"); + public void setRewriteMethod(RewriteMethod method) { + throw new UnsupportedOperationException("FuzzyQuery cannot change rewrite method"); } public Query rewrite(IndexReader reader) throws IOException { Index: src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 796761) +++ src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -37,7 +37,8 @@ * For example, {@link TermRangeFilter} and {@link PrefixFilter} extend * MultiTermQueryWrapperFilter. * This class also provides the functionality behind - * {@link MultiTermQuery#getFilter}, this is why it is not abstract. + * {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; + * this is why it is not abstract. */ public class MultiTermQueryWrapperFilter extends Filter { @@ -96,6 +97,8 @@ abstract class TermGenerator { public void generate(IndexReader reader, TermEnum enumerator) throws IOException { + final int[] docs = new int[32]; + final int[] freqs = new int[32]; TermDocs termDocs = reader.termDocs(); try { do { @@ -104,8 +107,15 @@ break; query.numberOfTerms++; termDocs.seek(term); - while (termDocs.next()) { - handleDoc(termDocs.doc()); + while (true) { + final int count = termDocs.read(docs, freqs); + if (count != 0) { + for(int i=0;iThis query is in - * {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) boolean query rewrite mode}. - * If you want to change this, use the new {@link TermRangeQuery} instead. + *

This query uses {@linkplain + * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}. If you + * want to change this, use the new {@link TermRangeQuery} + * instead. * * @deprecated Use {@link TermRangeQuery} for term ranges or * {@link NumericRangeQuery} for numeric ranges instead. @@ -93,7 +94,7 @@ inclusive, inclusive, collator ); - delegate.setConstantScoreRewrite(false); + delegate.setRewriteMethod(TermRangeQuery.SCORING_BOOLEAN_QUERY_REWRITE); } public void setBoost(float b) { Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -24,7 +24,15 @@ import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery - * is built by QueryParser for input like app*. */ + * is built by QueryParser for input like app*. + * + *

NOTE: Currently this query uses {@link + * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which + * assigns not-very-useful scores to the resulting hits. In + * 3.0 this default will change to {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you + * can use {@link MultiTermQuery#setRewriteMethod} to change + * it. */ public class PrefixQuery extends MultiTermQuery { private Term prefix; @@ -37,7 +45,7 @@ /** Returns the prefix of this query. */ public Term getPrefix() { return prefix; } - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new PrefixTermEnum(reader, prefix); } Index: src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (working copy) @@ -29,9 +29,8 @@ * supplied range according to {@link String#compareTo(String)}. It is not intended * for numerical ranges, use {@link NumericRangeQuery} instead. * - *

This query is in - * {@linkplain MultiTermQuery#setConstantScoreRewrite(boolean) constant score rewrite mode}. - * If you want to change this, use the new {@link TermRangeQuery} instead. + *

This query is hardwired to {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}. + * If you want to change this, use {@link TermRangeQuery} instead. * * @deprecated Use {@link TermRangeQuery} for term ranges or * {@link NumericRangeQuery} for numeric ranges instead. @@ -44,14 +43,14 @@ public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { super(fieldName, lowerVal, upperVal, includeLower, includeUpper); - this.constantScoreRewrite = true; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper, Collator collator) { super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator); - this.constantScoreRewrite = true; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } public String getLowerVal() { @@ -63,8 +62,9 @@ } /** Changes of mode are not supported by this class (fixed to constant score rewrite mode) */ - public void setConstantScoreRewrite(boolean constantScoreRewrite) { - if (!constantScoreRewrite) - throw new UnsupportedOperationException("Use TermRangeQuery instead to enable boolean query rewrite."); + public void setRewriteMethod(RewriteMethod method) { + if (method != CONSTANT_SCORE_FILTER_REWRITE) { + throw new UnsupportedOperationException("Use TermRangeQuery instead to change the rewrite method."); + } } } Index: src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -123,12 +123,12 @@ * *

Comparisions of the different types of RangeQueries on an index with about 500,000 docs showed * that {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) - * took about 30-40 secs to complete, {@link TermRangeQuery} in constant score rewrite mode took 5 secs + * took about 30-40 secs to complete, {@link TermRangeQuery} in filter rewrite mode took 5 secs * and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit * precision step). This query type was developed for a geographic portal, where the performance for * e.g. bounding boxes or exact date/time stamps is important.

* - *

The query defaults to {@linkplain #setConstantScoreRewrite constant score rewrite mode}. + *

The query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} * With precision steps of ≤4, this query can be run in conventional {@link BooleanQuery} * rewrite mode without changing the max clause count. * @@ -152,7 +152,7 @@ this.max = max; this.minInclusive = minInclusive; this.maxInclusive = maxInclusive; - setConstantScoreRewrite(true); + setRewriteMethod(CONSTANT_SCORE_FILTER_REWRITE); } /** @@ -260,7 +260,7 @@ } //@Override - protected FilteredTermEnum getEnum(final IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(final IndexReader reader) throws IOException { return new NumericRangeTermEnum(reader); } Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -20,7 +20,6 @@ import java.io.IOException; import java.text.Collator; -import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.ToStringUtils; @@ -31,9 +30,9 @@ * supplied range according to {@link String#compareTo(String)}. It is not intended * for numerical ranges, use {@link NumericRangeQuery} instead. * - *

This query is in constant score mode per default. - * See {@link MultiTermQuery#setConstantScoreRewrite} for the tradeoffs between - * enabling and disabling constantScoreRewrite mode. + *

This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. * @since 2.9 */ @@ -110,7 +109,7 @@ this.includeLower = includeLower; this.includeUpper = includeUpper; this.collator = collator; - this.constantScoreRewrite = true; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } /** Returns the field name for this query */ @@ -131,7 +130,7 @@ /** Returns the collator used to determine range inclusion, if any. */ public Collator getCollator() { return collator; } - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new TermRangeTermEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); } Index: src/java/org/apache/lucene/search/WildcardQuery.java =================================================================== --- src/java/org/apache/lucene/search/WildcardQuery.java (revision 795767) +++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy) @@ -30,8 +30,14 @@ * a Wildcard term should not start with one of the wildcards * or * ?. * - * @see WildcardTermEnum - */ + *

NOTE: Currently this query uses {@link + * MultiTermQuery#SCORING_BOOLEAN_QUERY_REWRITE}, which + * assigns not-very-useful scores to the resulting hits. In + * 3.0 this default will change to {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}; you can use {@link + * MultiTermQuery#setRewriteMethod} to change it. + * + * @see WildcardTermEnum */ public class WildcardQuery extends MultiTermQuery { private boolean termContainsWildcard; protected Term term; @@ -42,7 +48,7 @@ this.termContainsWildcard = (term.text().indexOf('*') != -1) || (term.text().indexOf('?') != -1); } - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + public FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new WildcardTermEnum(reader, getTerm()); } Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 795767) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -28,14 +28,11 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.FuzzyQuery; @@ -139,9 +136,9 @@ terms.putAll(disjunctTerms); } else if (query instanceof MultiTermQuery && (highlightCnstScrRngQuery || expandMultiTermQuery)) { MultiTermQuery mtq = ((MultiTermQuery)query); - if(mtq.getConstantScoreRewrite()) { + if(mtq.getRewriteMethod() == MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE) { mtq = copyMultiTermQuery(mtq); - mtq.setConstantScoreRewrite(false); + mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } String field;