Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 712531) +++ CHANGES.txt (working copy) @@ -5,6 +5,12 @@ Changes in runtime behavior + 1. LUCENE-1424: QueryParser now by default uses constant score query + rewriting when it generates a WildcardQuery and PrefixQuery (it + already does so for RangeQuery, as well). Call + setConstantScoreRewrite(false) to revert to BooleanQuery rewriting + method. (Mark Miller via Mike McCandless) + API Changes 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is @@ -50,6 +56,13 @@ 5. Added web-based demo of functionality in contrib's XML Query Parser packaged as War file (Mark Harwood) + 6. LUCENE-1424: Moved constant score query rewrite capability into + MultiTermQuery, allowing RangeQuery, PrefixQuery and WildcardQuery + to switch betwen constant-score rewriting or BooleanQuery + expansion rewriting via a new setConstantScoreRewrite method. + Deprecated ConstantScoreRangeQuery (Mark Miller via Mike + McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 712531) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -415,11 +414,11 @@ public void testRange() throws Exception { assertQueryEquals("[ a TO z]", null, "[a TO z]"); - assertTrue(getQuery("[ a TO z]", null) instanceof ConstantScoreRangeQuery); + assertTrue(((RangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite()); QueryParser qp = new QueryParser("field", new SimpleAnalyzer()); - qp.setUseOldRangeQuery(true); - assertTrue(qp.parse("[ a TO z]") instanceof RangeQuery); + qp.setConstantScoreRewrite(false); + assertFalse(((RangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite()); assertQueryEquals("[ a TO z ]", null, "[a TO z]"); assertQueryEquals("{ a TO z}", null, "{a TO z}"); @@ -458,7 +457,7 @@ // supported). // Test ConstantScoreRangeQuery - qp.setUseOldRangeQuery(false); + qp.setConstantScoreRewrite(true); ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); @@ -466,7 +465,7 @@ assertEquals("The index Term should be included.", 1, result.length); // Test RangeQuery - qp.setUseOldRangeQuery(true); + qp.setConstantScoreRewrite(false); result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); @@ -775,7 +774,7 @@ public void assertParseException(String queryString) throws Exception { try { - Query q = getQuery(queryString, null); + getQuery(queryString, null); } catch (ParseException expected) { return; } Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 0) +++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 0) @@ -0,0 +1,567 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import junit.framework.Assert; + +public class TestMultiTermConstantScore extends BaseTestRangeFilter { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + + public TestMultiTermConstantScore(String name) { + super(name); + } + + public TestMultiTermConstantScore() { + super(); + } + + Directory small; + + void assertEquals(String m, float e, float a) { + assertEquals(m, e, a, SCORE_COMP_THRESH); + } + + static public void assertEquals(String m, int e, int a) { + Assert.assertEquals(m, e, a); + } + + public void setUp() throws Exception { + super.setUp(); + + String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, + "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", + "X 4 5 6" }; + + small = new RAMDirectory(); + IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + + for (int i = 0; i < data.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", String.valueOf(i), Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i))); + doc + .add(new Field("all", "all", Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("all","all")); + if (null != data[i]) { + doc.add(new Field("data", data[i], Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("data",data[i])); + } + writer.addDocument(doc); + } + + writer.optimize(); + writer.close(); + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, boolean ih) { + RangeQuery query = new RangeQuery(f, l, h, il, ih); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, + boolean ih, Collator c) { + RangeQuery query = new RangeQuery(f, l, h, il, ih, c); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query cspq(Term prefix) { + PrefixQuery query = new PrefixQuery(prefix); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query cswcq(Term wild) { + WildcardQuery query = new WildcardQuery(wild); + query.setConstantScoreRewrite(true); + return query; + } + + public void testBasics() throws IOException { + QueryUtils.check(csrq("data", "1", "6", T, T)); + QueryUtils.check(csrq("data", "A", "Z", T, T)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A", + "Z", T, T)); + + QueryUtils.check(cspq(new Term("data", "p*u?"))); + QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term( + "data", "pres*"))); + + QueryUtils.check(cswcq(new Term("data", "p"))); + QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term( + "data", "pr*t?j"))); + } + + public void testBasicsRngCollating() throws IOException { + Collator c = Collator.getInstance(Locale.ENGLISH); + QueryUtils.check(csrq("data", "1", "6", T, T, c)); + QueryUtils.check(csrq("data", "A", "Z", T, T, c)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", + "Z", T, T, c)); + } + + public void testEqualScores() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + ScoreDoc[] result; + + // some hits match more terms then others, score should be the same + + result = search.search(csrq("data", "1", "6", T, T), null, 1000).scoreDocs; + int numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + float score = result[0].score; + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score); + } + + } + + public void testBoost() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // test for correct application of query normalization + // must use a non score normalizing method for this. + Query q = csrq("data", "1", "6", T, T); + q.setBoost(100); + search.search(q, null, new HitCollector() { + public void collect(int doc, float score) { + assertEquals("score for doc " + doc + " was not correct", 1.0f, score); + } + }); + + // + // Ensure that boosting works to score one clause of a query higher + // than another. + // + Query q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(.1f); + Query q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + BooleanQuery bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; + assertEquals(1, hits[0].doc); + assertEquals(0, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + + q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(10f); + q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq, null, 1000).scoreDocs; + assertEquals(0, hits[0].doc); + assertEquals(1, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + } + + public void testBooleanOrderUnAffected() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // first do a regular RangeQuery which uses term expansion so + // docs with more terms in range get higher scores + + Query rq = new RangeQuery(new Term("data", "1"), new Term("data", "4"), T); + + ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; + int numHits = expected.length; + + // now do a boolean where which also contains a + // ConstantScoreRangeQuery and make sure hte order is the same + + BooleanQuery q = new BooleanQuery(); + q.add(rq, BooleanClause.Occur.MUST);// T, F); + q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST);// T, F); + + ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; + + assertEquals("wrong numebr of hits", numHits, actual.length); + for (int i = 0; i < numHits; i++) { + assertEquals("mismatch in docid for hit#" + i, expected[i].doc, + actual[i].doc); + } + + } + + public void testRangeQueryId() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + } + + public void testRangeQueryIdCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("med,med,F,F,c", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med,med,T,T,c", 1, result.length); + } + + public void testRangeQueryRand() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(signedIndex.minR); + String maxRP = pad(signedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + } + + public void testRangeQueryRandCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + // using the unsigned index because collation seems to ignore hyphens + IndexReader reader = IndexReader.open(unsignedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(unsignedIndex.minR); + String maxRP = pad(unsignedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + } + + public void testFarsi() throws Exception { + + /* build an index */ + RAMDirectory farsiIndex = new RAMDirectory(); + IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, + IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc + .add(new Field("body", "body", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + writer.optimize(); + writer.close(); + + IndexReader reader = IndexReader.open(farsiIndex); + IndexSearcher search = new IndexSearcher(reader); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator c = Collator.getInstance(new Locale("ar")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a ConstantScoreRangeQuery + // with a Farsi Collator (or an Arabic one for the case when Farsi is + // not supported). + ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, + c), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, + 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + search.close(); + } +} Property changes on: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/search/TestRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestRangeQuery.java (revision 712531) +++ src/test/org/apache/lucene/search/TestRangeQuery.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; @@ -41,9 +40,7 @@ } public void testExclusive() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - false); + Query query = new RangeQuery("content", "A", "C", false, false); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -64,9 +61,7 @@ } public void testInclusive() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query query = new RangeQuery("content", "A", "C", true, true); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); @@ -88,13 +83,10 @@ } public void testEqualsHashcode() { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query query = new RangeQuery("content", "A", "C", true, true); + query.setBoost(1.0f); - Query other = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query other = new RangeQuery("content", "A", "C", true, true); other.setBoost(1.0f); assertEquals("query equals itself is true", query, query); @@ -104,38 +96,36 @@ other.setBoost(2.0f); assertFalse("Different boost queries are not equal", query.equals(other)); - other = new RangeQuery(new Term("notcontent", "A"), new Term("notcontent", "C"), true); + other = new RangeQuery("notcontent", "A", "C", true, true); assertFalse("Different fields are not equal", query.equals(other)); - other = new RangeQuery(new Term("content", "X"), new Term("content", "C"), true); + other = new RangeQuery("content", "X", "C", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); - other = new RangeQuery(new Term("content", "A"), new Term("content", "Z"), true); + other = new RangeQuery("content", "A", "Z", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); - query = new RangeQuery(null, new Term("content", "C"), true); - other = new RangeQuery(null, new Term("content", "C"), true); + query = new RangeQuery("content", null, "C", true, true); + other = new RangeQuery("content", null, "C", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); - query = new RangeQuery(new Term("content", "C"), null, true); - other = new RangeQuery(new Term("content", "C"), null, true); + query = new RangeQuery("content", "C", null, true, true); + other = new RangeQuery("content", "C", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); - query = new RangeQuery(null, new Term("content", "C"), true); - other = new RangeQuery(new Term("content", "C"), null, true); + query = new RangeQuery("content", null, "C", true, true); + other = new RangeQuery("content", "C", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); - query = new RangeQuery(new Term("content", "A"), new Term("content", "C"), false); - other = new RangeQuery(new Term("content", "A"), new Term("content", "C"), true); + query = new RangeQuery("content", "A", "C", false, false); + other = new RangeQuery("content", "A", "C", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); } public void testExclusiveCollating() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - false, Collator.getInstance(Locale.ENGLISH)); + Query query = new RangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -156,9 +146,7 @@ } public void testInclusiveCollating() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true, Collator.getInstance(Locale.ENGLISH)); + Query query = new RangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); @@ -184,9 +172,7 @@ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new RangeQuery(new Term("content", "\u062F"), - new Term("content", "\u0698"), - true, collator); + Query query = new RangeQuery("content", "\u062F", "\u0698", true, true, collator); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a RangeQuery with a Farsi @@ -196,9 +182,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); - query = new RangeQuery(new Term("content", "\u0633"), - new Term("content", "\u0638"), - true, collator); + query = new RangeQuery("content", "\u0633", "\u0638",true, true, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); searcher.close(); Index: src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java (revision 712531) +++ src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java (working copy) @@ -1,550 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; - -import java.io.IOException; -import java.text.Collator; -import java.util.Locale; - -import junit.framework.Assert; - -public class TestConstantScoreRangeQuery extends BaseTestRangeFilter { - - /** threshold for comparing floats */ - public static final float SCORE_COMP_THRESH = 1e-6f; - - public TestConstantScoreRangeQuery(String name) { - super(name); - } - public TestConstantScoreRangeQuery() { - super(); - } - - Directory small; - - void assertEquals(String m, float e, float a) { - assertEquals(m, e, a, SCORE_COMP_THRESH); - } - - static public void assertEquals(String m, int e, int a) { - Assert.assertEquals(m, e, a); - } - - public void setUp() throws Exception { - super.setUp(); - - String[] data = new String [] { - "A 1 2 3 4 5 6", - "Z 4 5 6", - null, - "B 2 4 5 6", - "Y 3 5 6", - null, - "C 3 6", - "X 4 5 6" - }; - - small = new RAMDirectory(); - IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true, - IndexWriter.MaxFieldLength.LIMITED); - - for (int i = 0; i < data.length; i++) { - Document doc = new Document(); - doc.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("id",String.valueOf(i))); - doc.add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("all","all")); - if (null != data[i]) { - doc.add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));//Field.Text("data",data[i])); - } - writer.addDocument(doc); - } - - writer.optimize(); - writer.close(); - } - - - - /** macro for readability */ - public static Query csrq(String f, String l, String h, - boolean il, boolean ih) { - return new ConstantScoreRangeQuery(f,l,h,il,ih); - } - - /** macro for readability */ - public static Query csrq(String f, String l, String h, - boolean il, boolean ih, Collator c) { - return new ConstantScoreRangeQuery(f,l,h,il,ih,c); - } - - public void testBasics() throws IOException { - QueryUtils.check(csrq("data","1","6",T,T)); - QueryUtils.check(csrq("data","A","Z",T,T)); - QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T)); - } - - public void testBasicsCollating() throws IOException { - Collator c = Collator.getInstance(Locale.ENGLISH); - QueryUtils.check(csrq("data","1","6",T,T,c)); - QueryUtils.check(csrq("data","A","Z",T,T,c)); - QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c)); - } - - public void testEqualScores() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - ScoreDoc[] result; - - // some hits match more terms then others, score should be the same - - result = search.search(csrq("data","1","6",T,T), null, 1000).scoreDocs; - int numHits = result.length; - assertEquals("wrong number of results", 6, numHits); - float score = result[0].score; - for (int i = 1; i < numHits; i++) { - assertEquals("score for " + i +" was not the same", - score, result[i].score); - } - - } - - public void testBoost() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - // test for correct application of query normalization - // must use a non score normalizing method for this. - Query q = csrq("data","1","6",T,T); - q.setBoost(100); - search.search(q,null, new HitCollector() { - public void collect(int doc, float score) { - assertEquals("score for doc " + doc +" was not correct", - 1.0f, score); - } - }); - - - // - // Ensure that boosting works to score one clause of a query higher - // than another. - // - Query q1 = csrq("data","A","A",T,T); // matches document #0 - q1.setBoost(.1f); - Query q2 = csrq("data","Z","Z",T,T); // matches document #1 - BooleanQuery bq = new BooleanQuery(true); - bq.add(q1, BooleanClause.Occur.SHOULD); - bq.add(q2, BooleanClause.Occur.SHOULD); - - ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; - assertEquals(1, hits[0].doc); - assertEquals(0, hits[1].doc); - assertTrue(hits[0].score > hits[1].score); - - q1 = csrq("data","A","A",T,T); // matches document #0 - q1.setBoost(10f); - q2 = csrq("data","Z","Z",T,T); // matches document #1 - bq = new BooleanQuery(true); - bq.add(q1, BooleanClause.Occur.SHOULD); - bq.add(q2, BooleanClause.Occur.SHOULD); - - hits = search.search(bq, null, 1000).scoreDocs; - assertEquals(0, hits[0].doc); - assertEquals(1, hits[1].doc); - assertTrue(hits[0].score > hits[1].score); - } - - - public void testBooleanOrderUnAffected() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - // first do a regular RangeQuery which uses term expansion so - // docs with more terms in range get higher scores - - Query rq = new RangeQuery(new Term("data","1"),new Term("data","4"),T); - - ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; - int numHits = expected.length; - - // now do a boolean where which also contains a - // ConstantScoreRangeQuery and make sure hte order is the same - - BooleanQuery q = new BooleanQuery(); - q.add(rq, BooleanClause.Occur.MUST);//T, F); - q.add(csrq("data","1","6", T, T), BooleanClause.Occur.MUST);//T, F); - - ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; - - assertEquals("wrong numebr of hits", numHits, actual.length); - for (int i = 0; i < numHits; i++) { - assertEquals("mismatch in docid for hit#"+i, - expected[i].doc, actual[i].doc); - } - - } - - - - - - public void testRangeQueryId() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - // test id, bounded on both ends - - result = search.search(csrq("id",minIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id",minIP,maxIP,T,F), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,T), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs-2, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("med and up", 1+ maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,T,T), null, numDocs).scoreDocs; - assertEquals("up to med", 1+ medId-minId, result.length); - - // unbounded id - - result = search.search(csrq("id",minIP,null,T,F), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id",null,maxIP,F,T), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id",minIP,null,F,F), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs-1, result.length); - - result = search.search(csrq("id",null,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs-1, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,F), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,F,T), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId-minId, result.length); - - // very small sets - - result = search.search(csrq("id",minIP,minIP,F,F), null, numDocs).scoreDocs; - assertEquals("min,min,F,F", 0, result.length); - result = search.search(csrq("id",medIP,medIP,F,F), null, numDocs).scoreDocs; - assertEquals("med,med,F,F", 0, result.length); - result = search.search(csrq("id",maxIP,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("max,max,F,F", 0, result.length); - - result = search.search(csrq("id",minIP,minIP,T,T), null, numDocs).scoreDocs; - assertEquals("min,min,T,T", 1, result.length); - result = search.search(csrq("id",null,minIP,F,T), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T", 1, result.length); - - result = search.search(csrq("id",maxIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("max,max,T,T", 1, result.length); - result = search.search(csrq("id",maxIP,null,T,F), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T", 1, result.length); - - result = search.search(csrq("id",medIP,medIP,T,T), null, numDocs).scoreDocs; - assertEquals("med,med,T,T", 1, result.length); - - } - - - public void testRangeQueryIdCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test id, bounded on both ends - - result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs-2, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("med and up", 1+ maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("up to med", 1+ medId-minId, result.length); - - // unbounded id - - result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs-1, result.length); - - result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs-1, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId-minId, result.length); - - // very small sets - - result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("med,med,F,F,c", 0, result.length); - result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - - result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("med,med,T,T,c", 1, result.length); - } - - - public void testRangeQueryRand() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - String minRP = pad(signedIndex.minR); - String maxRP = pad(signedIndex.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - // test extremes, bounded on both ends - - result = search.search(csrq("rand",minRP,maxRP,T,T), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand",minRP,maxRP,T,F), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,T), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs-2, result.length); - - // unbounded - - result = search.search(csrq("rand",minRP,null,T,F), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand",null,maxRP,F,T), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand",minRP,null,F,F), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs-1, result.length); - - result = search.search(csrq("rand",null,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs-1, result.length); - - // very small sets - - result = search.search(csrq("rand",minRP,minRP,F,F), null, numDocs).scoreDocs; - assertEquals("min,min,F,F", 0, result.length); - result = search.search(csrq("rand",maxRP,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("max,max,F,F", 0, result.length); - - result = search.search(csrq("rand",minRP,minRP,T,T), null, numDocs).scoreDocs; - assertEquals("min,min,T,T", 1, result.length); - result = search.search(csrq("rand",null,minRP,F,T), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T", 1, result.length); - - result = search.search(csrq("rand",maxRP,maxRP,T,T), null, numDocs).scoreDocs; - assertEquals("max,max,T,T", 1, result.length); - result = search.search(csrq("rand",maxRP,null,T,F), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T", 1, result.length); - - } - - public void testRangeQueryRandCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = IndexReader.open(unsignedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - String minRP = pad(unsignedIndex.minR); - String maxRP = pad(unsignedIndex.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test extremes, bounded on both ends - - result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs-2, result.length); - - // unbounded - - result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs-1, result.length); - - result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs-1, result.length); - - // very small sets - - result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - } - - public void testFarsi() throws Exception { - - /* build an index */ - RAMDirectory farsiIndex = new RAMDirectory(); - IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, - IndexWriter.MaxFieldLength.LIMITED); - Document doc = new Document(); - doc.add(new Field("content","\u0633\u0627\u0628", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - writer.optimize(); - writer.close(); - - IndexReader reader = IndexReader.open(farsiIndex); - IndexSearcher search = new IndexSearcher(reader); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not supported). - ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - search.close(); - } -} Index: src/java/org/apache/lucene/queryParser/Token.java =================================================================== --- src/java/org/apache/lucene/queryParser/Token.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/Token.java (working copy) @@ -121,4 +121,4 @@ } } -/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */ +/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java =================================================================== --- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy) @@ -137,4 +137,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */ +/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; @@ -94,7 +93,6 @@ *

* *

Note that QueryParser is not thread-safe.

- * */ public class QueryParser implements QueryParserConstants { @@ -117,7 +115,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean useOldRangeQuery= false; + boolean constantScoreRewrite= true; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -134,7 +132,7 @@ Map fieldToDateResolution = null; // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys and ConstantScoreRangeQuerys. + // for use when constructing RangeQuerys. Collator rangeCollator = null; /** The default operator for parsing queries. @@ -324,27 +322,43 @@ } /** - * By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery - * for range queries. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of range terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the old-fashioned RangeQuery and the above - * points are not required then set this option to true - * Default is false. + * @deprecated Please use {@link #setConstantScoreRewrite} instead. */ public void setUseOldRangeQuery(boolean useOldRangeQuery) { - this.useOldRangeQuery = useOldRangeQuery; + constantScoreRewrite = !useOldRangeQuery; } /** - * @see #setUseOldRangeQuery(boolean) + * @deprecated Please use {@link #getConstantScoreRewrite} instead. */ public boolean getUseOldRangeQuery() { - return useOldRangeQuery; + return !constantScoreRewrite; } /** + * By default QueryParser uses constant-score rewriting + * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + * a) Runs faster b) Does not have the scarcity of terms unduly influence score + * c) avoids any "TooManyBooleanClauses" exception. + * However, if your application really needs to use the + * old-fashioned BooleanQuery expansion rewriting and the above + * points are not relevant then set this option to true + * Default is false. + */ + public void setConstantScoreRewrite(boolean v) { + constantScoreRewrite = v; + } + + + /** + * @see #setConstantScoreRewrite(boolean) + */ + public boolean getConstantScoreRewrite() { + return constantScoreRewrite; + } + + /** * Set locale used by date range parsing. */ public void setLocale(Locale locale) { @@ -415,9 +429,7 @@ /** * Sets the collator used to determine index term inclusion in ranges - * specified either for ConstantScoreRangeQuerys or RangeQuerys (if - * {@link #setUseOldRangeQuery(boolean)} is called with a true - * value.) + * for RangeQuerys. *

* WARNING: Setting the rangeCollator to a non-null * collator using this method will cause every single index Term in the @@ -426,7 +438,6 @@ * be very slow. * * @param rc the collator to use when constructing RangeQuerys - * and ConstantScoreRangeQuerys */ public void setRangeCollator(Collator rc) { rangeCollator = rc; @@ -434,9 +445,7 @@ /** * @return the collator used to determine index term inclusion in ranges - * specified either for ConstantScoreRangeQuerys or RangeQuerys (if - * {@link #setUseOldRangeQuery(boolean)} is called with a true - * value.) + * for RangeQuerys. */ public Collator getRangeCollator() { return rangeCollator; @@ -718,7 +727,9 @@ * @return new PrefixQuery instance */ protected Query newPrefixQuery(Term prefix){ - return new PrefixQuery(prefix); + PrefixQuery query = new PrefixQuery(prefix); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -729,6 +740,7 @@ * @return new FuzzyQuery Instance */ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { + // FuzzyQuery doesn't yet allow constant score rewrite return new FuzzyQuery(term,minimumSimilarity,prefixLength); } @@ -741,17 +753,9 @@ * @return new RangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - if(useOldRangeQuery) - { - return new RangeQuery(new Term(field, part1), - new Term(field, part2), - inclusive, rangeCollator); - } - else - { - return new ConstantScoreRangeQuery - (field, part1, part2, inclusive, inclusive, rangeCollator); - } + RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -768,7 +772,9 @@ * @return new WildcardQuery instance */ protected Query newWildcardQuery(Term t) { - return new WildcardQuery(t); + WildcardQuery query = new WildcardQuery(t); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -1245,7 +1251,6 @@ boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; - boolean rangein = false; Query q; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case STAR: @@ -1490,12 +1495,6 @@ finally { jj_save(0, xla); } } - private boolean jj_3R_3() { - if (jj_scan_token(STAR)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - private boolean jj_3R_2() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; @@ -1512,6 +1511,12 @@ return false; } + private boolean jj_3R_3() { + if (jj_scan_token(STAR)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ Index: src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 712531) +++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -45,7 +45,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; @@ -118,10 +117,6 @@ *

* *

Note that QueryParser is not thread-safe.

- * - * @author Brian Goetz - * @author Peter Halacsy - * @author Tatu Saloranta */ public class QueryParser { @@ -144,7 +139,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean useOldRangeQuery= false; + boolean constantScoreRewrite= true; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -161,7 +156,7 @@ Map fieldToDateResolution = null; // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys and ConstantScoreRangeQuerys. + // for use when constructing RangeQuerys. Collator rangeCollator = null; /** The default operator for parsing queries. @@ -351,27 +346,43 @@ } /** - * By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery - * for range queries. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of range terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the old-fashioned RangeQuery and the above - * points are not required then set this option to true - * Default is false. + * @deprecated Please use {@link #setConstantScoreRewrite} instead. */ public void setUseOldRangeQuery(boolean useOldRangeQuery) { - this.useOldRangeQuery = useOldRangeQuery; + constantScoreRewrite = !useOldRangeQuery; } /** - * @see #setUseOldRangeQuery(boolean) + * @deprecated Please use {@link #getConstantScoreRewrite} instead. */ public boolean getUseOldRangeQuery() { - return useOldRangeQuery; + return !constantScoreRewrite; } /** + * By default QueryParser uses constant-score rewriting + * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + * a) Runs faster b) Does not have the scarcity of terms unduly influence score + * c) avoids any "TooManyBooleanClauses" exception. + * However, if your application really needs to use the + * old-fashioned BooleanQuery expansion rewriting and the above + * points are not relevant then set this option to true + * Default is false. + */ + public void setConstantScoreRewrite(boolean v) { + constantScoreRewrite = v; + } + + + /** + * @see #setConstantScoreRewrite(boolean) + */ + public boolean getConstantScoreRewrite() { + return constantScoreRewrite; + } + + /** * Set locale used by date range parsing. */ public void setLocale(Locale locale) { @@ -442,9 +453,7 @@ /** * Sets the collator used to determine index term inclusion in ranges - * specified either for ConstantScoreRangeQuerys or RangeQuerys (if - * {@link #setUseOldRangeQuery(boolean)} is called with a true - * value.) + * for RangeQuerys. *

* WARNING: Setting the rangeCollator to a non-null * collator using this method will cause every single index Term in the @@ -453,7 +462,6 @@ * be very slow. * * @param rc the collator to use when constructing RangeQuerys - * and ConstantScoreRangeQuerys */ public void setRangeCollator(Collator rc) { rangeCollator = rc; @@ -461,9 +469,7 @@ /** * @return the collator used to determine index term inclusion in ranges - * specified either for ConstantScoreRangeQuerys or RangeQuerys (if - * {@link #setUseOldRangeQuery(boolean)} is called with a true - * value.) + * for RangeQuerys. */ public Collator getRangeCollator() { return rangeCollator; @@ -745,7 +751,9 @@ * @return new PrefixQuery instance */ protected Query newPrefixQuery(Term prefix){ - return new PrefixQuery(prefix); + PrefixQuery query = new PrefixQuery(prefix); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -756,6 +764,7 @@ * @return new FuzzyQuery Instance */ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { + // FuzzyQuery doesn't yet allow constant score rewrite return new FuzzyQuery(term,minimumSimilarity,prefixLength); } @@ -768,17 +777,9 @@ * @return new RangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - if(useOldRangeQuery) - { - return new RangeQuery(new Term(field, part1), - new Term(field, part2), - inclusive, rangeCollator); - } - else - { - return new ConstantScoreRangeQuery - (field, part1, part2, inclusive, inclusive, rangeCollator); - } + RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -795,7 +796,9 @@ * @return new WildcardQuery instance */ protected Query newWildcardQuery(Term t) { - return new WildcardQuery(t); + WildcardQuery query = new WildcardQuery(t); + query.setConstantScoreRewrite(constantScoreRewrite); + return query; } /** @@ -1247,7 +1250,6 @@ boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; - boolean rangein = false; Query q; } { Index: src/java/org/apache/lucene/queryParser/CharStream.java =================================================================== --- src/java/org/apache/lucene/queryParser/CharStream.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy) @@ -109,4 +109,4 @@ void Done(); } -/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */ +/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/ParseException.java =================================================================== --- src/java/org/apache/lucene/queryParser/ParseException.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy) @@ -195,4 +195,4 @@ } } -/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */ +/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -19,7 +19,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -18,81 +18,197 @@ */ import java.io.IOException; +import java.util.BitSet; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.ToStringUtils; /** * A {@link Query} that matches documents containing a subset of terms provided * by a {@link FilteredTermEnum} enumeration. *

- * MultiTermQuery is not designed to be used by itself. - *
+ * MultiTermQuery is not designed to be used by itself.
* The reason being that it is not intialized with a {@link FilteredTermEnum} * enumeration. A {@link FilteredTermEnum} enumeration needs to be provided. *

* For example, {@link WildcardQuery} and {@link FuzzyQuery} extend * MultiTermQuery to provide {@link WildcardTermEnum} and * {@link FuzzyTermEnum}, respectively. + * + * The pattern Term may be null. A query that uses a null pattern Term should + * override equals and hashcode. */ public abstract class MultiTermQuery extends Query { - private Term term; + protected Term term; + protected boolean constantScoreRewrite = false; - /** Constructs a query for terms matching term. */ - public MultiTermQuery(Term term) { - this.term = term; - } + /** Constructs a query for terms matching term. */ + public MultiTermQuery(Term term) { + this.term = term; + } - /** Returns the pattern term. */ - public Term getTerm() { return term; } + /** + * Constructs a query matching terms that cannot be represented with a single + * Term. + */ + public MultiTermQuery() { + } - /** Construct the enumeration to be used, expanding the pattern term. */ - protected abstract FilteredTermEnum getEnum(IndexReader reader) + /** Returns the pattern term. */ + public Term getTerm() { + return term; + } + + /** Construct the enumeration to be used, expanding the pattern term. */ + protected abstract FilteredTermEnum getEnum(IndexReader reader) throws IOException; - public Query rewrite(IndexReader reader) throws IOException { + protected Filter getFilter() { + return new MultiTermFilter(this); + } + + public Query rewrite(IndexReader reader) throws IOException { + if (!constantScoreRewrite) { FilteredTermEnum enumerator = getEnum(reader); BooleanQuery query = new BooleanQuery(true); try { do { Term t = enumerator.term(); if (t != null) { - TermQuery tq = new TermQuery(t); // found a match + TermQuery tq = new TermQuery(t); // found a match tq.setBoost(getBoost() * enumerator.difference()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query + query.add(tq, BooleanClause.Occur.SHOULD); // add to query } } while (enumerator.next()); } finally { enumerator.close(); } return query; + } else { + Query query = new ConstantScoreQuery(getFilter()); + query.setBoost(getBoost()); + return query; } + } - /** Prints a user-readable version of this query. */ - public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - if (!term.field().equals(field)) { - buffer.append(term.field()); - buffer.append(":"); + /** Prints a user-readable version of this query. */ + public String toString(String field) { + StringBuffer buffer = new StringBuffer(); + if (term != null) { + if (!term.field().equals(field)) { + buffer.append(term.field()); + buffer.append(":"); + } + buffer.append(term.text()); + } else { + buffer.append("termPattern:unknown"); + } + buffer.append(ToStringUtils.boost(getBoost())); + return buffer.toString(); + } + + public boolean getConstantScoreRewrite() { + return constantScoreRewrite; + } + + public void setConstantScoreRewrite(boolean constantScoreRewrite) { + this.constantScoreRewrite = constantScoreRewrite; + } + + public boolean equals(Object o) { + if (o == null || term == null) { + throw new UnsupportedOperationException( + "MultiTermQuerys that do not use a pattern term need to override equals/hashcode"); + } + + if (this == o) + return true; + if (!(o instanceof MultiTermQuery)) + return false; + + final MultiTermQuery multiTermQuery = (MultiTermQuery) o; + + if (!term.equals(multiTermQuery.term)) + return false; + + return getBoost() == multiTermQuery.getBoost(); + } + + public int hashCode() { + if (term == null) { + throw new UnsupportedOperationException( + "MultiTermQuerys that do not use a pattern term need to override equals/hashcode"); + } + return term.hashCode() + Float.floatToRawIntBits(getBoost()); + } + + static class MultiTermFilter extends Filter { + MultiTermQuery mtq; + + abstract class TermGenerator { + public void generate(IndexReader reader) throws IOException { + TermEnum enumerator = mtq.getEnum(reader); + TermDocs termDocs = reader.termDocs(); + try { + do { + Term term = enumerator.term(); + if (term == null) + break; + termDocs.seek(term); + while (termDocs.next()) { + handleDoc(termDocs.doc()); + } + } while (enumerator.next()); + } finally { + termDocs.close(); + enumerator.close(); } - buffer.append(term.text()); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); + } + abstract public void handleDoc(int doc); } + + public MultiTermFilter(MultiTermQuery mtq) { + this.mtq = mtq; + } + public BitSet bits(IndexReader reader) throws IOException { + final BitSet bitSet = new BitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader); + return bitSet; + } + + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader); + + return bitSet; + } + public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof MultiTermQuery)) return false; - final MultiTermQuery multiTermQuery = (MultiTermQuery) o; + if (this == o) + return true; + if (!(o instanceof MultiTermFilter)) + return false; - if (!term.equals(multiTermQuery.term)) return false; - - return getBoost() == multiTermQuery.getBoost(); + final MultiTermFilter filter = (MultiTermFilter) o; + return mtq.equals(filter.mtq); } - + public int hashCode() { - return term.hashCode() + Float.floatToRawIntBits(getBoost()); + return mtq.hashCode(); } + } } Index: src/java/org/apache/lucene/search/PrefixTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 0) +++ src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.search; + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified prefix filter term. + *

+ * Term enumerations are always ordered by Term.compareTo(). Each term in + * the enumeration is greater than all that precede it. + * + */ +public class PrefixTermEnum extends FilteredTermEnum { + + private Term prefix; + private boolean endEnum = false; + + public PrefixTermEnum(IndexReader reader, Term prefix) throws IOException { + this.prefix = prefix; + + setEnum(reader.terms(new Term(prefix.field(), prefix.text()))); + } + + public float difference() { + return 1.0f; + } + + protected boolean endEnum() { + return endEnum; + } + + protected boolean termCompare(Term term) { + if (term.field() == prefix.field() && term.text().startsWith(prefix.text())) { + return true; + } + endEnum = true; + return false; + } +} Property changes on: src/java/org/apache/lucene/search/PrefixTermEnum.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -99,6 +99,10 @@ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); } + + public void setConstantScoreRewrite(boolean constantScoreRewrite) { + throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query"); + } public Query rewrite(IndexReader reader) throws IOException { FilteredTermEnum enumerator = getEnum(reader); Index: src/java/org/apache/lucene/search/RangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/RangeFilter.java (revision 712531) +++ src/java/org/apache/lucene/search/RangeFilter.java (working copy) @@ -18,10 +18,6 @@ */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.util.OpenBitSet; import java.io.IOException; import java.util.BitSet; @@ -44,6 +40,7 @@ private boolean includeLower; private boolean includeUpper; private Collator collator; + private RangeQuery rangeQuery; /** * @param fieldName The field this range applies to @@ -75,6 +72,7 @@ throw new IllegalArgumentException ("The upper bound must be non-null to be inclusive"); } + initRangeQuery(); } /** @@ -99,8 +97,13 @@ Collator collator) { this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper); this.collator = collator; + initRangeQuery(); } + private void initRangeQuery() { + rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator); + } + /** * Constructs a filter for field fieldName matching * less than or equal to upperTerm. @@ -124,81 +127,7 @@ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. */ public BitSet bits(IndexReader reader) throws IOException { - BitSet bits = new BitSet(reader.maxDoc()); - TermEnum enumerator = - (null != lowerTerm && collator == null - ? reader.terms(new Term(fieldName, lowerTerm)) - : reader.terms(new Term(fieldName))); - - try { - - if (enumerator.term() == null) { - return bits; - } - - TermDocs termDocs = reader.termDocs(); - try { - if (collator != null) { - do { - Term term = enumerator.term(); - if (term != null && term.field().equals(fieldName)) { - if ((lowerTerm == null - || (includeLower - ? collator.compare(term.text(), lowerTerm) >= 0 - : collator.compare(term.text(), lowerTerm) > 0)) - && (upperTerm == null - || (includeUpper - ? collator.compare(term.text(), upperTerm) <= 0 - : collator.compare(term.text(), upperTerm) < 0))) { - /* we have a good term, find the docs */ - termDocs.seek(enumerator.term()); - while (termDocs.next()) { - bits.set(termDocs.doc()); - } - } - } - } - while (enumerator.next()); - } else { // collator is null - use Unicode code point ordering - boolean checkLower = false; - if (!includeLower) // make adjustments to set to exclusive - checkLower = true; - - do { - Term term = enumerator.term(); - if (term != null && term.field().equals(fieldName)) { - if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) { - checkLower = false; - if (upperTerm != null) { - int compare = upperTerm.compareTo(term.text()); - /* if beyond the upper term, or is exclusive and - * this is equal to the upper term, break out */ - if ((compare < 0) || - (!includeUpper && compare==0)) { - break; - } - } - /* we have a good term, find the docs */ - - termDocs.seek(enumerator.term()); - while (termDocs.next()) { - bits.set(termDocs.doc()); - } - } - } else { - break; - } - } - while (enumerator.next()); - } - } finally { - termDocs.close(); - } - } finally { - enumerator.close(); - } - - return bits; + return rangeQuery.getFilter().bits(reader); } /** @@ -206,84 +135,7 @@ * permitted in search results. */ public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - - TermEnum enumerator = - (null != lowerTerm && collator == null - ? reader.terms(new Term(fieldName, lowerTerm)) - : reader.terms(new Term(fieldName))); - - try { - - if (enumerator.term() == null) { - return bits; - } - - TermDocs termDocs = reader.termDocs(); - - try { - if (collator != null) { - do { - Term term = enumerator.term(); - if (term != null && term.field().equals(fieldName)) { - if ((lowerTerm == null - || (includeLower - ? collator.compare(term.text(), lowerTerm) >= 0 - : collator.compare(term.text(), lowerTerm) > 0)) - && (upperTerm == null - || (includeUpper - ? collator.compare(term.text(), upperTerm) <= 0 - : collator.compare(term.text(), upperTerm) < 0))) { - /* we have a good term, find the docs */ - termDocs.seek(enumerator.term()); - while (termDocs.next()) { - bits.set(termDocs.doc()); - } - } - } - } - while (enumerator.next()); - } else { // collator is null - use Unicode code point ordering - boolean checkLower = false; - if (!includeLower) // make adjustments to set to exclusive - checkLower = true; - - do { - Term term = enumerator.term(); - if (term != null && term.field().equals(fieldName)) { - if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) { - checkLower = false; - if (upperTerm != null) { - int compare = upperTerm.compareTo(term.text()); - /* if beyond the upper term, or is exclusive and - * this is equal to the upper term, break out */ - if ((compare < 0) || - (!includeUpper && compare==0)) { - break; - } - } - /* we have a good term, find the docs */ - - termDocs.seek(enumerator.term()); - while (termDocs.next()) { - bits.set(termDocs.doc()); - } - } - } else { - break; - } - } - while (enumerator.next()); - } - - } finally { - termDocs.close(); - } - } finally { - enumerator.close(); - } - - return bits; + return rangeQuery.getFilter().getDocIdSet(reader); } public String toString() { Index: src/java/org/apache/lucene/search/RangeTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/RangeTermEnum.java (revision 0) +++ src/java/org/apache/lucene/search/RangeTermEnum.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.lucene.search; + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified range parameters. + *

+ * Term enumerations are always ordered by Term.compareTo(). Each term in + * the enumeration is greater than all that precede it. + */ +public class RangeTermEnum extends FilteredTermEnum { + + private Collator collator = null; + private boolean endEnum = false; + private String field; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * @param reader + * @param collator + * The collator to use to collate index Terms, to determine their + * membership in the range bounded by lowerTerm and + * upperTerm. + * @param field + * An interned field that holds both lower and upper terms. + * @param lowerTermText + * The term text at the lower end of the range + * @param upperTermText + * The term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is included in the range. + * @param includeUpper + * If true, the upperTerm is included in the range. + * + * @throws IOException + */ + public RangeTermEnum(IndexReader reader, Collator collator, String field, + String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper) throws IOException { + this.collator = collator; + this.upperTermText = upperTermText; + this.lowerTermText = lowerTermText; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.field = field; + + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (this.lowerTermText == null) { + this.lowerTermText = ""; + this.includeLower = true; + } + + if (this.upperTermText == null) { + this.includeUpper = true; + } + + setEnum(reader.terms(new Term(this.field, this.lowerTermText))); + } + + public float difference() { + return 1.0f; + } + + protected boolean endEnum() { + return endEnum; + } + + protected boolean termCompare(Term term) { + if (collator == null) { + // Use Unicode code point ordering + boolean checkLower = false; + if (!includeLower) // make adjustments to set to exclusive + checkLower = true; + if (term != null && term.field() == field) { // interned comparison + if (!checkLower || null==lowerTermText || term.text().compareTo(lowerTermText) > 0) { + checkLower = false; + if (upperTermText != null) { + int compare = upperTermText.compareTo(term.text()); + /* + * if beyond the upper term, or is exclusive and this is equal to + * the upper term, break out + */ + if ((compare < 0) || + (!includeUpper && compare==0)) { + endEnum = true; + return false; + } + } + return true; + } + } else { + // break + endEnum = true; + return false; + } + return false; + } else { + if (term != null && term.field() == field) { // interned comparison + if ((lowerTermText == null + || (includeLower + ? collator.compare(term.text(), lowerTermText) >= 0 + : collator.compare(term.text(), lowerTermText) > 0)) + && (upperTermText == null + || (includeUpper + ? collator.compare(term.text(), upperTermText) <= 0 + : collator.compare(term.text(), upperTermText) < 0))) { + return true; + } + return false; + } + endEnum = true; + return false; + } + } +} Property changes on: src/java/org/apache/lucene/search/RangeTermEnum.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/search/RangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/RangeQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/RangeQuery.java (working copy) @@ -21,234 +21,205 @@ import java.text.Collator; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.ToStringUtils; /** * A Query that matches documents within an exclusive range. A RangeQuery * is built by QueryParser for input like [010 TO 120] but only if the QueryParser has * the useOldRangeQuery property set to true. The QueryParser default behaviour is to use - * the newer ConstantScoreRangeQuery class. This is generally preferable because: + * the newer ConstantScore mode. This is generally preferable because: *

* - * - * @see ConstantScoreRangeQuery - * * * @version $Id$ */ -public class RangeQuery extends Query -{ - private Term lowerTerm; - private Term upperTerm; - private boolean inclusive; - private Collator collator; +public class RangeQuery extends MultiTermQuery { + private Term lowerTerm; + private Term upperTerm; + private Collator collator; + private String field; + private boolean includeLower; + private boolean includeUpper; - /** Constructs a query selecting all terms greater than - * lowerTerm but less than upperTerm. - * There must be at least one term and either term may be null, - * in which case there is no bound on that side, but if there are - * two terms, both terms must be for the same field. - * - * @param lowerTerm The Term at the lower end of the range - * @param upperTerm The Term at the upper end of the range - * @param inclusive If true, both lowerTerm and - * upperTerm will themselves be included in the range. - */ - public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) - { - if (lowerTerm == null && upperTerm == null) - { - throw new IllegalArgumentException("At least one term must be non-null"); - } - if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field()) - { - throw new IllegalArgumentException("Both terms must be for the same field"); - } - // if we have a lowerTerm, start there. otherwise, start at beginning - if (lowerTerm != null) { - this.lowerTerm = lowerTerm; - } - else { - this.lowerTerm = new Term(upperTerm.field()); - } + /** + * Constructs a query selecting all terms greater than lowerTerm + * but less than upperTerm. There must be at least one term and + * either term may be null, in which case there is no bound on that side. + * + * @param field The field that holds both lower and upper terms. + * @param lowerTerm + * The term text at the lower end of the range + * @param upperTerm + * The term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is + * included in the range. + * @param includeUpper + * If true, the upperTerm is + * included in the range. + */ + public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + init(new Term(field, lowerTerm), new Term(field, upperTerm), includeLower, includeUpper, null); + } - this.upperTerm = upperTerm; - this.inclusive = inclusive; - } + /** Constructs a query selecting all terms greater than + * lowerTerm but less than upperTerm. + * There must be at least one term and either term may be null, + * in which case there is no bound on that side. + *

+ * If collator is not null, it will be used to decide whether + * index terms are within the given range, rather than using the Unicode code + * point order in which index terms are stored. + *

+ * WARNING: Using this constructor and supplying a non-null + * value in the collator parameter will cause every single + * index Term in the Field referenced by lowerTerm and/or upperTerm to be + * examined. Depending on the number of index Terms in this Field, the + * operation could be very slow. + * + * @param lowerTerm The Term text at the lower end of the range + * @param upperTerm The Term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is + * included in the range. + * @param includeUpper + * If true, the upperTerm is + * included in the range. + * @param collator The collator to use to collate index Terms, to determine + * their membership in the range bounded by lowerTerm and + * upperTerm. + */ + public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, + Collator collator) { + init(new Term(field, lowerTerm), new Term(field,upperTerm), includeLower, includeUpper, collator); + } - /** Constructs a query selecting all terms greater than - * lowerTerm but less than upperTerm. - * There must be at least one term and either term may be null, - * in which case there is no bound on that side, but if there are - * two terms, both terms must be for the same field. - *

- * If collator is not null, it will be used to decide whether - * index terms are within the given range, rather than using the Unicode code - * point order in which index terms are stored. - *

- * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The Term at the lower end of the range - * @param upperTerm The Term at the upper end of the range - * @param inclusive If true, both lowerTerm and - * upperTerm will themselves be included in the range. - * @param collator The collator to use to collate index Terms, to determine - * their membership in the range bounded by lowerTerm and - * upperTerm. - */ - public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive, - Collator collator) - { - this(lowerTerm, upperTerm, inclusive); - this.collator = collator; - } + /** @deprecated Please use {@link #RangeQuery(String, + * String, String, boolean, boolean, Collator)} instead */ + public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive, + Collator collator) { + init(lowerTerm, upperTerm, inclusive, inclusive, collator); + } + + /** @deprecated Please use {@link #RangeQuery(String, + * String, String, boolean, boolean)} instead */ + public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) { + init(lowerTerm, upperTerm, inclusive, inclusive, null); + } - public Query rewrite(IndexReader reader) throws IOException { + private void init(Term lowerTerm, Term upperTerm, boolean includeLower, boolean includeUpper, Collator collator) { + if (lowerTerm == null && upperTerm == null) + throw new IllegalArgumentException("At least one term must be non-null"); + if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field()) + throw new IllegalArgumentException("Both terms must be for the same field"); - BooleanQuery query = new BooleanQuery(true); - String testField = getField(); - if (collator != null) { - TermEnum enumerator = reader.terms(new Term(testField, "")); - String lowerTermText = lowerTerm != null ? lowerTerm.text() : null; - String upperTermText = upperTerm != null ? upperTerm.text() : null; + if (lowerTerm == null) + this.field = upperTerm.field(); + else + this.field = lowerTerm.field(); + this.lowerTerm = lowerTerm; + this.upperTerm = upperTerm; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.collator = collator; + } + + /** Returns the field name for this query */ + public String getField() { + return field; + } - try { - do { - Term term = enumerator.term(); - if (term != null && term.field() == testField) { // interned comparison - if ((lowerTermText == null - || (inclusive ? collator.compare(term.text(), lowerTermText) >= 0 - : collator.compare(term.text(), lowerTermText) > 0)) - && (upperTermText == null - || (inclusive ? collator.compare(term.text(), upperTermText) <= 0 - : collator.compare(term.text(), upperTermText) < 0))) { - addTermToQuery(term, query); - } - } - } - while (enumerator.next()); - } - finally { - enumerator.close(); - } - } - else { // collator is null - TermEnum enumerator = reader.terms(lowerTerm); + /** Returns the lower term of this range query. + * @deprecated Use {@link #getLowerTermText} instead. */ + public Term getLowerTerm() { return lowerTerm; } - try { + /** Returns the upper term of this range query. + * @deprecated Use {@link #getUpperTermText} instead. */ + public Term getUpperTerm() { return upperTerm; } + + /** Returns the lower value of this range query */ + public String getLowerTermText() { return lowerTerm == null ? null : lowerTerm.text(); } - boolean checkLower = false; - if (!inclusive) // make adjustments to set to exclusive - checkLower = true; + /** Returns the upper value of this range query */ + public String getUpperTermText() { return upperTerm == null ? null : upperTerm.text(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return includeLower; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return includeUpper; } - do { - Term term = enumerator.term(); - if (term != null && term.field() == testField) { // interned comparison - if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) { - checkLower = false; - if (upperTerm != null) { - int compare = upperTerm.text().compareTo(term.text()); - /* if beyond the upper term, or is exclusive and - * this is equal to the upper term, break out */ - if ((compare < 0) || (!inclusive && compare == 0)) - break; - } - addTermToQuery(term, query); // Found a match - } - } - else { - break; - } - } - while (enumerator.next()); - } - finally { - enumerator.close(); - } - } - return query; - } + /** Returns true if the range query is inclusive + * @deprecated Use {@link #includesLower}, {@link #includesUpper} instead. + */ + public boolean isInclusive() { return includeUpper && includeLower; } - private void addTermToQuery(Term term, BooleanQuery query) { - TermQuery tq = new TermQuery(term); - tq.setBoost(getBoost()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - } + /** Returns the collator used to determine range inclusion, if any. */ + public Collator getCollator() { return collator; } + + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + return new RangeTermEnum(reader, collator, getField(), lowerTerm.text(), + upperTerm.text(), includeLower, includeUpper); + } - /** Returns the field name for this query */ - public String getField() { - return (lowerTerm != null ? lowerTerm.field() : upperTerm.field()); - } + /** Prints a user-readable version of this query. */ + public String toString(String field) { + StringBuffer buffer = new StringBuffer(); + if (!getField().equals(field)) { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(includeLower ? '[' : '{'); + buffer.append(lowerTerm != null ? lowerTerm.text() : "*"); + buffer.append(" TO "); + buffer.append(upperTerm != null ? upperTerm.text() : "*"); + buffer.append(includeUpper ? ']' : '}'); + if (getBoost() != 1.0f) { + buffer.append("^"); + buffer.append(Float.toString(getBoost())); + } + return buffer.toString(); + } - /** Returns the lower term of this range query */ - public Term getLowerTerm() { return lowerTerm; } + /** Returns true iff o is equal to this. */ + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof RangeQuery)) return false; + RangeQuery other = (RangeQuery) o; - /** Returns the upper term of this range query */ - public Term getUpperTerm() { return upperTerm; } + if (this.field != other.field // interned comparison + || this.includeLower != other.includeLower + || this.includeUpper != other.includeUpper + || (this.collator != null && ! this.collator.equals(other.collator)) + ) { return false; } + String lowerVal = this.lowerTerm == null ? null : lowerTerm.text(); + String upperVal = this.upperTerm == null ? null : upperTerm.text(); + String olowerText = other.lowerTerm == null ? null : other.lowerTerm.text(); + String oupperText = other.upperTerm == null ? null : other.upperTerm.text(); + if (lowerVal != null ? !lowerVal.equals(olowerText) : olowerText != null) return false; + if (upperVal != null ? !upperVal.equals(oupperText) : oupperText != null) return false; + return this.getBoost() == other.getBoost(); + } - /** Returns true if the range query is inclusive */ - public boolean isInclusive() { return inclusive; } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return collator; } - - - /** Prints a user-readable version of this query. */ - public String toString(String field) - { - StringBuffer buffer = new StringBuffer(); - if (!getField().equals(field)) - { - buffer.append(getField()); - buffer.append(":"); - } - buffer.append(inclusive ? "[" : "{"); - buffer.append(lowerTerm != null ? lowerTerm.text() : "null"); - buffer.append(" TO "); - buffer.append(upperTerm != null ? upperTerm.text() : "null"); - buffer.append(inclusive ? "]" : "}"); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); - } - - /** Returns true iff o is equal to this. */ - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof RangeQuery)) return false; - - final RangeQuery other = (RangeQuery) o; - if (this.getBoost() != other.getBoost()) return false; - if (this.inclusive != other.inclusive) return false; - if (this.collator != null && ! this.collator.equals(other.collator)) - return false; - - // one of lowerTerm and upperTerm can be null - if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false; - if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false; - return true; - } - - /** Returns a hash code value for this object.*/ - public int hashCode() { - int h = Float.floatToIntBits(getBoost()); - h ^= lowerTerm != null ? lowerTerm.hashCode() : 0; - // reversible mix to make lower and upper position dependent and - // to prevent them from cancelling out. - h ^= (h << 25) | (h >>> 8); - h ^= upperTerm != null ? upperTerm.hashCode() : 0; - h ^= this.inclusive ? 0x2742E74A : 0; - h ^= collator != null ? collator.hashCode() : 0; - return h; - } + /** Returns a hash code value for this object.*/ + public int hashCode() { + int h = Float.floatToIntBits(getBoost()) ^ field.hashCode(); + String lowerVal = this.lowerTerm == null ? null : lowerTerm.text(); + String upperVal = this.upperTerm == null ? null : upperTerm.text(); + // hashCode of "" is 0, so don't use that for null... + h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a; + // don't just XOR upperVal with out mixing either it or h, as it will cancel + // out lowerVal if they are equal. + h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix + h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69); + h ^= (includeLower ? 0x665599aa : 0) + ^ (includeUpper ? 0x99aa5566 : 0); + h ^= collator != null ? collator.hashCode() : 0; + return h; + } } Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -19,48 +19,33 @@ import java.io.IOException; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery * is built by QueryParser for input like app*. */ -public class PrefixQuery extends Query { +public class PrefixQuery extends MultiTermQuery { private Term prefix; /** Constructs a query for terms starting with prefix. */ public PrefixQuery(Term prefix) { + super(prefix); this.prefix = prefix; } /** Returns the prefix of this query. */ public Term getPrefix() { return prefix; } + + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + return new PrefixTermEnum(reader, getTerm()); + } - public Query rewrite(IndexReader reader) throws IOException { - BooleanQuery query = new BooleanQuery(true); - TermEnum enumerator = reader.terms(prefix); - try { - String prefixText = prefix.text(); - String prefixField = prefix.field(); - do { - Term term = enumerator.term(); - if (term != null && - term.text().startsWith(prefixText) && - term.field() == prefixField) // interned comparison - { - TermQuery tq = new TermQuery(term); // found a match - tq.setBoost(getBoost()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - //System.out.println("added " + term); - } else { - break; - } - } while (enumerator.next()); - } finally { - enumerator.close(); - } - return query; + public boolean equals(Object o) { + if (o instanceof PrefixQuery) + return super.equals(o); + + return false; } /** Prints a user-readable version of this query. */ @@ -75,18 +60,4 @@ buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - - /** Returns true iff o is equal to this. */ - public boolean equals(Object o) { - if (!(o instanceof PrefixQuery)) - return false; - PrefixQuery other = (PrefixQuery)o; - return (this.getBoost() == other.getBoost()) - && this.prefix.equals(other.prefix); - } - - /** Returns a hash code value for this object.*/ - public int hashCode() { - return Float.floatToIntBits(getBoost()) ^ prefix.hashCode() ^ 0x6634D93C; - } } Index: src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (working copy) @@ -17,9 +17,6 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; - -import java.io.IOException; import java.text.Collator; /** @@ -32,121 +29,31 @@ * Either or both endpoints may be open. Open endpoints may not be exclusive * (you can't select all but the first or last term without explicitly specifying the term to exclude.) * - * + * @deprecated Please use {@link RangeQuery}, and call + * {@link RangeQuery#setConstantScoreRewrite}, instead. * @version $Id$ */ - -public class ConstantScoreRangeQuery extends Query +public class ConstantScoreRangeQuery extends RangeQuery { - private final String fieldName; - private final String lowerVal; - private final String upperVal; - private final boolean includeLower; - private final boolean includeUpper; - private Collator collator; - public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { - // do a little bit of normalization... - // open ended range queries should always be inclusive. - if (lowerVal==null) { - includeLower=true; - } else if (includeLower && lowerVal.equals("")) { - lowerVal=null; - } - if (upperVal==null) { - includeUpper=true; - } - - - this.fieldName = fieldName.intern(); // intern it, just like terms... - this.lowerVal = lowerVal; - this.upperVal = upperVal; - this.includeLower = includeLower; - this.includeUpper = includeUpper; + super(fieldName, lowerVal, upperVal, includeLower, includeUpper); + this.constantScoreRewrite = true; } public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, - boolean includeUpper, Collator collator) - { - this(fieldName, lowerVal, upperVal, includeLower, includeUpper); - this.collator = collator; + boolean includeUpper, Collator collator) { + super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator); + this.constantScoreRewrite = true; } - /** Returns the field name for this query */ - public String getField() { return fieldName; } - /** Returns the value of the lower endpoint of this range query, null if open ended */ - public String getLowerVal() { return lowerVal; } - /** Returns the value of the upper endpoint of this range query, null if open ended */ - public String getUpperVal() { return upperVal; } - /** Returns true if the lower endpoint is inclusive */ - public boolean includesLower() { return includeLower; } - /** Returns true if the upper endpoint is inclusive */ - public boolean includesUpper() { return includeUpper; } - - public Query rewrite(IndexReader reader) throws IOException { - // Map to RangeFilter semantics which are slightly different... - RangeFilter rangeFilt = new RangeFilter - (fieldName, lowerVal != null?lowerVal:"", upperVal, - lowerVal==""?false:includeLower, upperVal==null?false:includeUpper, - collator); - Query q = new ConstantScoreQuery(rangeFilt); - q.setBoost(getBoost()); - return q; + public String getLowerVal() { + return getLowerTermText(); } - /** Prints a user-readable version of this query. */ - public String toString(String field) - { - StringBuffer buffer = new StringBuffer(); - if (!getField().equals(field)) - { - buffer.append(getField()); - buffer.append(":"); - } - buffer.append(includeLower ? '[' : '{'); - buffer.append(lowerVal != null ? lowerVal : "*"); - buffer.append(" TO "); - buffer.append(upperVal != null ? upperVal : "*"); - buffer.append(includeUpper ? ']' : '}'); - if (getBoost() != 1.0f) - { - buffer.append("^"); - buffer.append(Float.toString(getBoost())); - } - return buffer.toString(); - } - - /** Returns true if o is equal to this. */ - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof ConstantScoreRangeQuery)) return false; - ConstantScoreRangeQuery other = (ConstantScoreRangeQuery) o; - - if (this.fieldName != other.fieldName // interned comparison - || this.includeLower != other.includeLower - || this.includeUpper != other.includeUpper - || (this.collator != null && ! this.collator.equals(other.collator)) - ) { return false; } - if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false; - if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false; - return this.getBoost() == other.getBoost(); - } - - /** Returns a hash code value for this object.*/ - public int hashCode() { - int h = Float.floatToIntBits(getBoost()) ^ fieldName.hashCode(); - // hashCode of "" is 0, so don't use that for null... - h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a; - // don't just XOR upperVal with out mixing either it or h, as it will cancel - // out lowerVal if they are equal. - h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix - h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69); - h ^= (includeLower ? 0x665599aa : 0) - ^ (includeUpper ? 0x99aa5566 : 0); - h ^= collator != null ? collator.hashCode() : 0; - return h; - } + public String getUpperVal() { + return getUpperTermText(); + } } Index: src/java/org/apache/lucene/search/PrefixFilter.java =================================================================== --- src/java/org/apache/lucene/search/PrefixFilter.java (revision 712531) +++ src/java/org/apache/lucene/search/PrefixFilter.java (working copy) @@ -17,25 +17,28 @@ * limitations under the License. */ -import org.apache.lucene.search.Filter; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.TermDocs; - -import java.util.BitSet; import java.io.IOException; +import java.util.BitSet; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + /** - * - * @version $Id$ + * A Filter that restricts search results to values that have a matching prefix in a given + * field. + * + *

+ * This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter + * + *

*/ public class PrefixFilter extends Filter { protected final Term prefix; + private PrefixQuery prefixQuery; public PrefixFilter(Term prefix) { this.prefix = prefix; + this.prefixQuery = new PrefixQuery(prefix); } public Term getPrefix() { return prefix; } @@ -44,23 +47,11 @@ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. */ public BitSet bits(IndexReader reader) throws IOException { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + return prefixQuery.getFilter().bits(reader); } public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + return prefixQuery.getFilter().getDocIdSet(reader); } /** Prints a user-readable version of this query. */ @@ -71,52 +62,8 @@ buffer.append(")"); return buffer.toString(); } -} -// keep this protected until I decide if it's a good way -// to separate id generation from collection (or should -// I just reuse hitcollector???) -interface IdGenerator { - public void generate(IndexReader reader) throws IOException; - public void handleDoc(int doc); } -abstract class PrefixGenerator implements IdGenerator { - protected final Term prefix; - PrefixGenerator(Term prefix) { - this.prefix = prefix; - } - - public void generate(IndexReader reader) throws IOException { - TermEnum enumerator = reader.terms(prefix); - TermDocs termDocs = reader.termDocs(); - - try { - - String prefixText = prefix.text(); - String prefixField = prefix.field(); - do { - Term term = enumerator.term(); - if (term != null && - term.text().startsWith(prefixText) && - term.field() == prefixField) // interned comparison - { - termDocs.seek(term); - while (termDocs.next()) { - handleDoc(termDocs.doc()); - } - } else { - break; - } - } while (enumerator.next()); - } finally { - termDocs.close(); - enumerator.close(); - } - } -} - - - Index: src/java/org/apache/lucene/search/WildcardTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/WildcardTermEnum.java (revision 712531) +++ src/java/org/apache/lucene/search/WildcardTermEnum.java (working copy) @@ -40,9 +40,7 @@ boolean endEnum = false; /** - * Creates a new WildcardTermEnum. Passing in a - * {@link org.apache.lucene.index.Term Term} that does not contain a - * WILDCARD_CHAR will cause an exception to be thrown. + * Creates a new WildcardTermEnum. *

* After calling the constructor the enumeration is already pointing to the first * valid term if such a term exists. @@ -62,8 +60,12 @@ else if (cidx >= 0) { idx = Math.min(idx, cidx); } + if (idx != -1) { + pre = searchTerm.text().substring(0,idx); + } else { + pre = ""; + } - pre = searchTerm.text().substring(0,idx); preLen = pre.length(); text = text.substring(preLen); setEnum(reader.terms(new Term(searchTerm.field(), pre))); Index: src/java/org/apache/lucene/search/WildcardQuery.java =================================================================== --- src/java/org/apache/lucene/search/WildcardQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy) @@ -48,12 +48,11 @@ return false; } - + public Query rewrite(IndexReader reader) throws IOException { - if (this.termContainsWildcard) { - return super.rewrite(reader); - } - + if (!termContainsWildcard) return new TermQuery(getTerm()); + else + return super.rewrite(reader); } } Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 712531) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -44,7 +44,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; @@ -52,6 +51,7 @@ import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; @@ -462,14 +462,8 @@ public void testGetConstantScoreRangeFragments() throws Exception { numHighlights = 0; - String queryString = FIELD_NAME + ":[kannedy TO kznnedy]"; - // Need to explicitly set the QueryParser property to use RangeQuery - // rather - // than RangeFilters - QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); - // parser.setUseOldRangeQuery(true); - query = parser.parse(queryString); + query = new ConstantScoreRangeQuery(FIELD_NAME, "kannedy", "kznnedy", true, true); searcher = new IndexSearcher(ramDir); // can't rewrite ConstantScoreRangeQuery if you want to highlight it - @@ -1043,6 +1037,7 @@ searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); + parser.setConstantScoreRewrite(false); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) @@ -1343,6 +1338,7 @@ public void doSearching(String queryString) throws Exception { QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); + parser.setConstantScoreRewrite(false); query = parser.parse(queryString); doSearching(query); }