Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 712531) +++ CHANGES.txt (working copy) @@ -5,6 +5,12 @@ Changes in runtime behavior + 1. LUCENE-1424: QueryParser now by default uses constant score query + rewriting when it generates a WildcardQuery and PrefixQuery (it + already does so for RangeQuery, as well). Call + setConstantScoreRewrite(false) to revert to BooleanQuery rewriting + method. (Mark Miller via Mike McCandless) + API Changes 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is @@ -50,6 +56,13 @@ 5. Added web-based demo of functionality in contrib's XML Query Parser packaged as War file (Mark Harwood) + 6. LUCENE-1424: Moved constant score query rewrite capability into + MultiTermQuery, allowing RangeQuery, PrefixQuery and WildcardQuery + to switch betwen constant-score rewriting or BooleanQuery + expansion rewriting via a new setConstantScoreRewrite method. + Deprecated ConstantScoreRangeQuery (Mark Miller via Mike + McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 712531) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -43,7 +43,6 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -415,11 +414,11 @@ public void testRange() throws Exception { assertQueryEquals("[ a TO z]", null, "[a TO z]"); - assertTrue(getQuery("[ a TO z]", null) instanceof ConstantScoreRangeQuery); + assertTrue(((RangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite()); QueryParser qp = new QueryParser("field", new SimpleAnalyzer()); - qp.setUseOldRangeQuery(true); - assertTrue(qp.parse("[ a TO z]") instanceof RangeQuery); + qp.setConstantScoreRewrite(false); + assertFalse(((RangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite()); assertQueryEquals("[ a TO z ]", null, "[a TO z]"); assertQueryEquals("{ a TO z}", null, "{a TO z}"); @@ -458,7 +457,7 @@ // supported). // Test ConstantScoreRangeQuery - qp.setUseOldRangeQuery(false); + qp.setConstantScoreRewrite(true); ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); @@ -466,7 +465,7 @@ assertEquals("The index Term should be included.", 1, result.length); // Test RangeQuery - qp.setUseOldRangeQuery(true); + qp.setConstantScoreRewrite(false); result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); @@ -775,7 +774,7 @@ public void assertParseException(String queryString) throws Exception { try { - Query q = getQuery(queryString, null); + getQuery(queryString, null); } catch (ParseException expected) { return; } Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 0) +++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 0) @@ -0,0 +1,567 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +import java.io.IOException; +import java.text.Collator; +import java.util.Locale; + +import junit.framework.Assert; + +public class TestMultiTermConstantScore extends BaseTestRangeFilter { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + + public TestMultiTermConstantScore(String name) { + super(name); + } + + public TestMultiTermConstantScore() { + super(); + } + + Directory small; + + void assertEquals(String m, float e, float a) { + assertEquals(m, e, a, SCORE_COMP_THRESH); + } + + static public void assertEquals(String m, int e, int a) { + Assert.assertEquals(m, e, a); + } + + public void setUp() throws Exception { + super.setUp(); + + String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null, + "B 2 4 5 6", "Y 3 5 6", null, "C 3 6", + "X 4 5 6" }; + + small = new RAMDirectory(); + IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + + for (int i = 0; i < data.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", String.valueOf(i), Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i))); + doc + .add(new Field("all", "all", Field.Store.YES, + Field.Index.NOT_ANALYZED));// Field.Keyword("all","all")); + if (null != data[i]) { + doc.add(new Field("data", data[i], Field.Store.YES, + Field.Index.ANALYZED));// Field.Text("data",data[i])); + } + writer.addDocument(doc); + } + + writer.optimize(); + writer.close(); + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, boolean ih) { + RangeQuery query = new RangeQuery(f, l, h, il, ih); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query csrq(String f, String l, String h, boolean il, + boolean ih, Collator c) { + RangeQuery query = new RangeQuery(f, l, h, il, ih, c); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query cspq(Term prefix) { + PrefixQuery query = new PrefixQuery(prefix); + query.setConstantScoreRewrite(true); + return query; + } + + /** macro for readability */ + public static Query cswcq(Term wild) { + WildcardQuery query = new WildcardQuery(wild); + query.setConstantScoreRewrite(true); + return query; + } + + public void testBasics() throws IOException { + QueryUtils.check(csrq("data", "1", "6", T, T)); + QueryUtils.check(csrq("data", "A", "Z", T, T)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A", + "Z", T, T)); + + QueryUtils.check(cspq(new Term("data", "p*u?"))); + QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term( + "data", "pres*"))); + + QueryUtils.check(cswcq(new Term("data", "p"))); + QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term( + "data", "pr*t?j"))); + } + + public void testBasicsRngCollating() throws IOException { + Collator c = Collator.getInstance(Locale.ENGLISH); + QueryUtils.check(csrq("data", "1", "6", T, T, c)); + QueryUtils.check(csrq("data", "A", "Z", T, T, c)); + QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", + "Z", T, T, c)); + } + + public void testEqualScores() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + ScoreDoc[] result; + + // some hits match more terms then others, score should be the same + + result = search.search(csrq("data", "1", "6", T, T), null, 1000).scoreDocs; + int numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + float score = result[0].score; + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score); + } + + } + + public void testBoost() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // test for correct application of query normalization + // must use a non score normalizing method for this. + Query q = csrq("data", "1", "6", T, T); + q.setBoost(100); + search.search(q, null, new HitCollector() { + public void collect(int doc, float score) { + assertEquals("score for doc " + doc + " was not correct", 1.0f, score); + } + }); + + // + // Ensure that boosting works to score one clause of a query higher + // than another. + // + Query q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(.1f); + Query q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + BooleanQuery bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; + assertEquals(1, hits[0].doc); + assertEquals(0, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + + q1 = csrq("data", "A", "A", T, T); // matches document #0 + q1.setBoost(10f); + q2 = csrq("data", "Z", "Z", T, T); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq, null, 1000).scoreDocs; + assertEquals(0, hits[0].doc); + assertEquals(1, hits[1].doc); + assertTrue(hits[0].score > hits[1].score); + } + + public void testBooleanOrderUnAffected() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // first do a regular RangeQuery which uses term expansion so + // docs with more terms in range get higher scores + + Query rq = new RangeQuery(new Term("data", "1"), new Term("data", "4"), T); + + ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; + int numHits = expected.length; + + // now do a boolean where which also contains a + // ConstantScoreRangeQuery and make sure hte order is the same + + BooleanQuery q = new BooleanQuery(); + q.add(rq, BooleanClause.Occur.MUST);// T, F); + q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST);// T, F); + + ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; + + assertEquals("wrong numebr of hits", numHits, actual.length); + for (int i = 0; i < numHits; i++) { + assertEquals("mismatch in docid for hit#" + i, expected[i].doc, + actual[i].doc); + } + + } + + public void testRangeQueryId() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs; + assertEquals("med,med,F,F", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs; + assertEquals("med,med,T,T", 1, result.length); + + } + + public void testRangeQueryIdCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test id, bounded on both ends + + result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but last", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but first", numDocs - 1, result.length); + + result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but ends", numDocs - 2, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med and up", 1 + maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("up to med", 1 + medId - minId, result.length); + + // unbounded id + + result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("min and up", numDocs, result.length); + + result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("max and down", numDocs, result.length); + + result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not min, but up", numDocs - 1, result.length); + + result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not max, but down", numDocs - 1, result.length); + + result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; + assertEquals("med and up, not max", maxId - medId, result.length); + + result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("not min, up to med", medId - minId, result.length); + + // very small sets + + result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("med,med,F,F,c", 0, result.length); + result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + + result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; + assertEquals("med,med,T,T,c", 1, result.length); + } + + public void testRangeQueryRand() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(signedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(signedIndex.minR); + String maxRP = pad(signedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F), null, numDocs).scoreDocs; + assertEquals("min,min,F,F", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F), null, numDocs).scoreDocs; + assertEquals("max,max,F,F", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T), null, numDocs).scoreDocs; + assertEquals("min,min,T,T", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T), null, numDocs).scoreDocs; + assertEquals("max,max,T,T", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T", 1, result.length); + + } + + public void testRangeQueryRandCollating() throws IOException { + // NOTE: uses index build in *super* setUp + + // using the unsigned index because collation seems to ignore hyphens + IndexReader reader = IndexReader.open(unsignedIndex.index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(unsignedIndex.minR); + String maxRP = pad(unsignedIndex.maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1 + maxId - minId); + + ScoreDoc[] result; + + Collator c = Collator.getInstance(Locale.ENGLISH); + + // test extremes, bounded on both ends + + result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("find all", numDocs, result.length); + + result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; + assertEquals("all but biggest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("all but smallest", numDocs - 1, result.length); + + result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("all but extremes", numDocs - 2, result.length); + + // unbounded + + result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("smallest and up", numDocs, result.length); + + result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("biggest and down", numDocs, result.length); + + result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; + assertEquals("not smallest, but up", numDocs - 1, result.length); + + result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("not biggest, but down", numDocs - 1, result.length); + + // very small sets + + result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("min,min,F,F,c", 0, result.length); + result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; + assertEquals("max,max,F,F,c", 0, result.length); + + result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("min,min,T,T,c", 1, result.length); + result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; + assertEquals("nul,min,F,T,c", 1, result.length); + + result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; + assertEquals("max,max,T,T,c", 1, result.length); + result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; + assertEquals("max,nul,T,T,c", 1, result.length); + } + + public void testFarsi() throws Exception { + + /* build an index */ + RAMDirectory farsiIndex = new RAMDirectory(); + IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, + IndexWriter.MaxFieldLength.LIMITED); + Document doc = new Document(); + doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + doc + .add(new Field("body", "body", Field.Store.YES, + Field.Index.NOT_ANALYZED)); + writer.addDocument(doc); + + writer.optimize(); + writer.close(); + + IndexReader reader = IndexReader.open(farsiIndex); + IndexSearcher search = new IndexSearcher(reader); + + // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in + // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi + // characters properly. + Collator c = Collator.getInstance(new Locale("ar")); + + // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi + // orders the U+0698 character before the U+0633 character, so the single + // index Term below should NOT be returned by a ConstantScoreRangeQuery + // with a Farsi Collator (or an Arabic one for the case when Farsi is + // not supported). + ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, + c), null, 1000).scoreDocs; + assertEquals("The index Term should not be included.", 0, result.length); + + result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, + 1000).scoreDocs; + assertEquals("The index Term should be included.", 1, result.length); + search.close(); + } +} Property changes on: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/test/org/apache/lucene/search/TestRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestRangeQuery.java (revision 712531) +++ src/test/org/apache/lucene/search/TestRangeQuery.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; @@ -41,9 +40,7 @@ } public void testExclusive() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - false); + Query query = new RangeQuery("content", "A", "C", false, false); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -64,9 +61,7 @@ } public void testInclusive() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query query = new RangeQuery("content", "A", "C", true, true); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); @@ -88,13 +83,10 @@ } public void testEqualsHashcode() { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query query = new RangeQuery("content", "A", "C", true, true); + query.setBoost(1.0f); - Query other = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true); + Query other = new RangeQuery("content", "A", "C", true, true); other.setBoost(1.0f); assertEquals("query equals itself is true", query, query); @@ -104,38 +96,36 @@ other.setBoost(2.0f); assertFalse("Different boost queries are not equal", query.equals(other)); - other = new RangeQuery(new Term("notcontent", "A"), new Term("notcontent", "C"), true); + other = new RangeQuery("notcontent", "A", "C", true, true); assertFalse("Different fields are not equal", query.equals(other)); - other = new RangeQuery(new Term("content", "X"), new Term("content", "C"), true); + other = new RangeQuery("content", "X", "C", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); - other = new RangeQuery(new Term("content", "A"), new Term("content", "Z"), true); + other = new RangeQuery("content", "A", "Z", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); - query = new RangeQuery(null, new Term("content", "C"), true); - other = new RangeQuery(null, new Term("content", "C"), true); + query = new RangeQuery("content", null, "C", true, true); + other = new RangeQuery("content", null, "C", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); - query = new RangeQuery(new Term("content", "C"), null, true); - other = new RangeQuery(new Term("content", "C"), null, true); + query = new RangeQuery("content", "C", null, true, true); + other = new RangeQuery("content", "C", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); - query = new RangeQuery(null, new Term("content", "C"), true); - other = new RangeQuery(new Term("content", "C"), null, true); + query = new RangeQuery("content", null, "C", true, true); + other = new RangeQuery("content", "C", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); - query = new RangeQuery(new Term("content", "A"), new Term("content", "C"), false); - other = new RangeQuery(new Term("content", "A"), new Term("content", "C"), true); + query = new RangeQuery("content", "A", "C", false, false); + other = new RangeQuery("content", "A", "C", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); } public void testExclusiveCollating() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - false, Collator.getInstance(Locale.ENGLISH)); + Query query = new RangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -156,9 +146,7 @@ } public void testInclusiveCollating() throws Exception { - Query query = new RangeQuery(new Term("content", "A"), - new Term("content", "C"), - true, Collator.getInstance(Locale.ENGLISH)); + Query query = new RangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir); @@ -184,9 +172,7 @@ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new RangeQuery(new Term("content", "\u062F"), - new Term("content", "\u0698"), - true, collator); + Query query = new RangeQuery("content", "\u062F", "\u0698", true, true, collator); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a RangeQuery with a Farsi @@ -196,9 +182,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); - query = new RangeQuery(new Term("content", "\u0633"), - new Term("content", "\u0638"), - true, collator); + query = new RangeQuery("content", "\u0633", "\u0638",true, true, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); searcher.close(); Index: src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java (revision 712531) +++ src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java (working copy) @@ -1,550 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; - -import java.io.IOException; -import java.text.Collator; -import java.util.Locale; - -import junit.framework.Assert; - -public class TestConstantScoreRangeQuery extends BaseTestRangeFilter { - - /** threshold for comparing floats */ - public static final float SCORE_COMP_THRESH = 1e-6f; - - public TestConstantScoreRangeQuery(String name) { - super(name); - } - public TestConstantScoreRangeQuery() { - super(); - } - - Directory small; - - void assertEquals(String m, float e, float a) { - assertEquals(m, e, a, SCORE_COMP_THRESH); - } - - static public void assertEquals(String m, int e, int a) { - Assert.assertEquals(m, e, a); - } - - public void setUp() throws Exception { - super.setUp(); - - String[] data = new String [] { - "A 1 2 3 4 5 6", - "Z 4 5 6", - null, - "B 2 4 5 6", - "Y 3 5 6", - null, - "C 3 6", - "X 4 5 6" - }; - - small = new RAMDirectory(); - IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true, - IndexWriter.MaxFieldLength.LIMITED); - - for (int i = 0; i < data.length; i++) { - Document doc = new Document(); - doc.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("id",String.valueOf(i))); - doc.add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("all","all")); - if (null != data[i]) { - doc.add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));//Field.Text("data",data[i])); - } - writer.addDocument(doc); - } - - writer.optimize(); - writer.close(); - } - - - - /** macro for readability */ - public static Query csrq(String f, String l, String h, - boolean il, boolean ih) { - return new ConstantScoreRangeQuery(f,l,h,il,ih); - } - - /** macro for readability */ - public static Query csrq(String f, String l, String h, - boolean il, boolean ih, Collator c) { - return new ConstantScoreRangeQuery(f,l,h,il,ih,c); - } - - public void testBasics() throws IOException { - QueryUtils.check(csrq("data","1","6",T,T)); - QueryUtils.check(csrq("data","A","Z",T,T)); - QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T)); - } - - public void testBasicsCollating() throws IOException { - Collator c = Collator.getInstance(Locale.ENGLISH); - QueryUtils.check(csrq("data","1","6",T,T,c)); - QueryUtils.check(csrq("data","A","Z",T,T,c)); - QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c)); - } - - public void testEqualScores() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - ScoreDoc[] result; - - // some hits match more terms then others, score should be the same - - result = search.search(csrq("data","1","6",T,T), null, 1000).scoreDocs; - int numHits = result.length; - assertEquals("wrong number of results", 6, numHits); - float score = result[0].score; - for (int i = 1; i < numHits; i++) { - assertEquals("score for " + i +" was not the same", - score, result[i].score); - } - - } - - public void testBoost() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - // test for correct application of query normalization - // must use a non score normalizing method for this. - Query q = csrq("data","1","6",T,T); - q.setBoost(100); - search.search(q,null, new HitCollector() { - public void collect(int doc, float score) { - assertEquals("score for doc " + doc +" was not correct", - 1.0f, score); - } - }); - - - // - // Ensure that boosting works to score one clause of a query higher - // than another. - // - Query q1 = csrq("data","A","A",T,T); // matches document #0 - q1.setBoost(.1f); - Query q2 = csrq("data","Z","Z",T,T); // matches document #1 - BooleanQuery bq = new BooleanQuery(true); - bq.add(q1, BooleanClause.Occur.SHOULD); - bq.add(q2, BooleanClause.Occur.SHOULD); - - ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs; - assertEquals(1, hits[0].doc); - assertEquals(0, hits[1].doc); - assertTrue(hits[0].score > hits[1].score); - - q1 = csrq("data","A","A",T,T); // matches document #0 - q1.setBoost(10f); - q2 = csrq("data","Z","Z",T,T); // matches document #1 - bq = new BooleanQuery(true); - bq.add(q1, BooleanClause.Occur.SHOULD); - bq.add(q2, BooleanClause.Occur.SHOULD); - - hits = search.search(bq, null, 1000).scoreDocs; - assertEquals(0, hits[0].doc); - assertEquals(1, hits[1].doc); - assertTrue(hits[0].score > hits[1].score); - } - - - public void testBooleanOrderUnAffected() throws IOException { - // NOTE: uses index build in *this* setUp - - IndexReader reader = IndexReader.open(small); - IndexSearcher search = new IndexSearcher(reader); - - // first do a regular RangeQuery which uses term expansion so - // docs with more terms in range get higher scores - - Query rq = new RangeQuery(new Term("data","1"),new Term("data","4"),T); - - ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; - int numHits = expected.length; - - // now do a boolean where which also contains a - // ConstantScoreRangeQuery and make sure hte order is the same - - BooleanQuery q = new BooleanQuery(); - q.add(rq, BooleanClause.Occur.MUST);//T, F); - q.add(csrq("data","1","6", T, T), BooleanClause.Occur.MUST);//T, F); - - ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs; - - assertEquals("wrong numebr of hits", numHits, actual.length); - for (int i = 0; i < numHits; i++) { - assertEquals("mismatch in docid for hit#"+i, - expected[i].doc, actual[i].doc); - } - - } - - - - - - public void testRangeQueryId() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - // test id, bounded on both ends - - result = search.search(csrq("id",minIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id",minIP,maxIP,T,F), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,T), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs-2, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("med and up", 1+ maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,T,T), null, numDocs).scoreDocs; - assertEquals("up to med", 1+ medId-minId, result.length); - - // unbounded id - - result = search.search(csrq("id",minIP,null,T,F), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id",null,maxIP,F,T), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id",minIP,null,F,F), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs-1, result.length); - - result = search.search(csrq("id",null,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs-1, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,F), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,F,T), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId-minId, result.length); - - // very small sets - - result = search.search(csrq("id",minIP,minIP,F,F), null, numDocs).scoreDocs; - assertEquals("min,min,F,F", 0, result.length); - result = search.search(csrq("id",medIP,medIP,F,F), null, numDocs).scoreDocs; - assertEquals("med,med,F,F", 0, result.length); - result = search.search(csrq("id",maxIP,maxIP,F,F), null, numDocs).scoreDocs; - assertEquals("max,max,F,F", 0, result.length); - - result = search.search(csrq("id",minIP,minIP,T,T), null, numDocs).scoreDocs; - assertEquals("min,min,T,T", 1, result.length); - result = search.search(csrq("id",null,minIP,F,T), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T", 1, result.length); - - result = search.search(csrq("id",maxIP,maxIP,T,T), null, numDocs).scoreDocs; - assertEquals("max,max,T,T", 1, result.length); - result = search.search(csrq("id",maxIP,null,T,F), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T", 1, result.length); - - result = search.search(csrq("id",medIP,medIP,T,T), null, numDocs).scoreDocs; - assertEquals("med,med,T,T", 1, result.length); - - } - - - public void testRangeQueryIdCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test id, bounded on both ends - - result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs-1, result.length); - - result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs-2, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("med and up", 1+ maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("up to med", 1+ medId-minId, result.length); - - // unbounded id - - result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs-1, result.length); - - result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs-1, result.length); - - result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId-medId, result.length); - - result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId-minId, result.length); - - // very small sets - - result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("med,med,F,F,c", 0, result.length); - result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - - result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs; - assertEquals("med,med,T,T,c", 1, result.length); - } - - - public void testRangeQueryRand() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = IndexReader.open(signedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - String minRP = pad(signedIndex.minR); - String maxRP = pad(signedIndex.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - // test extremes, bounded on both ends - - result = search.search(csrq("rand",minRP,maxRP,T,T), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand",minRP,maxRP,T,F), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,T), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs-2, result.length); - - // unbounded - - result = search.search(csrq("rand",minRP,null,T,F), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand",null,maxRP,F,T), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand",minRP,null,F,F), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs-1, result.length); - - result = search.search(csrq("rand",null,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs-1, result.length); - - // very small sets - - result = search.search(csrq("rand",minRP,minRP,F,F), null, numDocs).scoreDocs; - assertEquals("min,min,F,F", 0, result.length); - result = search.search(csrq("rand",maxRP,maxRP,F,F), null, numDocs).scoreDocs; - assertEquals("max,max,F,F", 0, result.length); - - result = search.search(csrq("rand",minRP,minRP,T,T), null, numDocs).scoreDocs; - assertEquals("min,min,T,T", 1, result.length); - result = search.search(csrq("rand",null,minRP,F,T), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T", 1, result.length); - - result = search.search(csrq("rand",maxRP,maxRP,T,T), null, numDocs).scoreDocs; - assertEquals("max,max,T,T", 1, result.length); - result = search.search(csrq("rand",maxRP,null,T,F), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T", 1, result.length); - - } - - public void testRangeQueryRandCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = IndexReader.open(unsignedIndex.index); - IndexSearcher search = new IndexSearcher(reader); - - String minRP = pad(unsignedIndex.minR); - String maxRP = pad(unsignedIndex.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1+ maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test extremes, bounded on both ends - - result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs-1, result.length); - - result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs-2, result.length); - - // unbounded - - result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs-1, result.length); - - result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs-1, result.length); - - // very small sets - - result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - } - - public void testFarsi() throws Exception { - - /* build an index */ - RAMDirectory farsiIndex = new RAMDirectory(); - IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, - IndexWriter.MaxFieldLength.LIMITED); - Document doc = new Document(); - doc.add(new Field("content","\u0633\u0627\u0628", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - writer.optimize(); - writer.close(); - - IndexReader reader = IndexReader.open(farsiIndex); - IndexSearcher search = new IndexSearcher(reader); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not supported). - ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - search.close(); - } -} Index: src/java/org/apache/lucene/queryParser/Token.java =================================================================== --- src/java/org/apache/lucene/queryParser/Token.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/Token.java (working copy) @@ -121,4 +121,4 @@ } } -/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */ +/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java =================================================================== --- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy) @@ -137,4 +137,4 @@ this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */ +/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */ Index: src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 712531) +++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; @@ -94,7 +93,6 @@ *
* *Note that QueryParser is not thread-safe.
- * */ public class QueryParser implements QueryParserConstants { @@ -117,7 +115,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean useOldRangeQuery= false; + boolean constantScoreRewrite= true; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -134,7 +132,7 @@ Map fieldToDateResolution = null; // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys and ConstantScoreRangeQuerys. + // for use when constructing RangeQuerys. Collator rangeCollator = null; /** The default operator for parsing queries. @@ -324,27 +322,43 @@ } /** - * By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery - * for range queries. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of range terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the old-fashioned RangeQuery and the above - * points are not required then set this option totrue
- * Default is false.
+ * @deprecated Please use {@link #setConstantScoreRewrite} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- this.useOldRangeQuery = useOldRangeQuery;
+ constantScoreRewrite = !useOldRangeQuery;
}
/**
- * @see #setUseOldRangeQuery(boolean)
+ * @deprecated Please use {@link #getConstantScoreRewrite} instead.
*/
public boolean getUseOldRangeQuery() {
- return useOldRangeQuery;
+ return !constantScoreRewrite;
}
/**
+ * By default QueryParser uses constant-score rewriting
+ * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
+ * a) Runs faster b) Does not have the scarcity of terms unduly influence score
+ * c) avoids any "TooManyBooleanClauses" exception.
+ * However, if your application really needs to use the
+ * old-fashioned BooleanQuery expansion rewriting and the above
+ * points are not relevant then set this option to true
+ * Default is false.
+ */
+ public void setConstantScoreRewrite(boolean v) {
+ constantScoreRewrite = v;
+ }
+
+
+ /**
+ * @see #setConstantScoreRewrite(boolean)
+ */
+ public boolean getConstantScoreRewrite() {
+ return constantScoreRewrite;
+ }
+
+ /**
* Set locale used by date range parsing.
*/
public void setLocale(Locale locale) {
@@ -415,9 +429,7 @@
/**
* Sets the collator used to determine index term inclusion in ranges
- * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
- * {@link #setUseOldRangeQuery(boolean)} is called with a true
- * value.)
+ * for RangeQuerys.
*
* WARNING: Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
@@ -426,7 +438,6 @@
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
- * and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
@@ -434,9 +445,7 @@
/**
* @return the collator used to determine index term inclusion in ranges
- * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
- * {@link #setUseOldRangeQuery(boolean)} is called with a true
- * value.)
+ * for RangeQuerys.
*/
public Collator getRangeCollator() {
return rangeCollator;
@@ -718,7 +727,9 @@
* @return new PrefixQuery instance
*/
protected Query newPrefixQuery(Term prefix){
- return new PrefixQuery(prefix);
+ PrefixQuery query = new PrefixQuery(prefix);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -729,6 +740,7 @@
* @return new FuzzyQuery Instance
*/
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
+ // FuzzyQuery doesn't yet allow constant score rewrite
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
}
@@ -741,17 +753,9 @@
* @return new RangeQuery instance
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
- if(useOldRangeQuery)
- {
- return new RangeQuery(new Term(field, part1),
- new Term(field, part2),
- inclusive, rangeCollator);
- }
- else
- {
- return new ConstantScoreRangeQuery
- (field, part1, part2, inclusive, inclusive, rangeCollator);
- }
+ RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -768,7 +772,9 @@
* @return new WildcardQuery instance
*/
protected Query newWildcardQuery(Term t) {
- return new WildcardQuery(t);
+ WildcardQuery query = new WildcardQuery(t);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -1245,7 +1251,6 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
- boolean rangein = false;
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case STAR:
@@ -1490,12 +1495,6 @@
finally { jj_save(0, xla); }
}
- private boolean jj_3R_3() {
- if (jj_scan_token(STAR)) return true;
- if (jj_scan_token(COLON)) return true;
- return false;
- }
-
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@@ -1512,6 +1511,12 @@
return false;
}
+ private boolean jj_3R_3() {
+ if (jj_scan_token(STAR)) return true;
+ if (jj_scan_token(COLON)) return true;
+ return false;
+ }
+
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 712531)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -45,7 +45,6 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@@ -118,10 +117,6 @@
*
*
* Note that QueryParser is not thread-safe.
- * - * @author Brian Goetz - * @author Peter Halacsy - * @author Tatu Saloranta */ public class QueryParser { @@ -144,7 +139,7 @@ private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; - boolean useOldRangeQuery= false; + boolean constantScoreRewrite= true; boolean allowLeadingWildcard = false; boolean enablePositionIncrements = false; @@ -161,7 +156,7 @@ Map fieldToDateResolution = null; // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys and ConstantScoreRangeQuerys. + // for use when constructing RangeQuerys. Collator rangeCollator = null; /** The default operator for parsing queries. @@ -351,27 +346,43 @@ } /** - * By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery - * for range queries. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of range terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the old-fashioned RangeQuery and the above - * points are not required then set this option totrue
- * Default is false.
+ * @deprecated Please use {@link #setConstantScoreRewrite} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
- this.useOldRangeQuery = useOldRangeQuery;
+ constantScoreRewrite = !useOldRangeQuery;
}
/**
- * @see #setUseOldRangeQuery(boolean)
+ * @deprecated Please use {@link #getConstantScoreRewrite} instead.
*/
public boolean getUseOldRangeQuery() {
- return useOldRangeQuery;
+ return !constantScoreRewrite;
}
/**
+ * By default QueryParser uses constant-score rewriting
+ * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
+ * a) Runs faster b) Does not have the scarcity of terms unduly influence score
+ * c) avoids any "TooManyBooleanClauses" exception.
+ * However, if your application really needs to use the
+ * old-fashioned BooleanQuery expansion rewriting and the above
+ * points are not relevant then set this option to true
+ * Default is false.
+ */
+ public void setConstantScoreRewrite(boolean v) {
+ constantScoreRewrite = v;
+ }
+
+
+ /**
+ * @see #setConstantScoreRewrite(boolean)
+ */
+ public boolean getConstantScoreRewrite() {
+ return constantScoreRewrite;
+ }
+
+ /**
* Set locale used by date range parsing.
*/
public void setLocale(Locale locale) {
@@ -442,9 +453,7 @@
/**
* Sets the collator used to determine index term inclusion in ranges
- * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
- * {@link #setUseOldRangeQuery(boolean)} is called with a true
- * value.)
+ * for RangeQuerys.
*
* WARNING: Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
@@ -453,7 +462,6 @@
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
- * and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
@@ -461,9 +469,7 @@
/**
* @return the collator used to determine index term inclusion in ranges
- * specified either for ConstantScoreRangeQuerys or RangeQuerys (if
- * {@link #setUseOldRangeQuery(boolean)} is called with a true
- * value.)
+ * for RangeQuerys.
*/
public Collator getRangeCollator() {
return rangeCollator;
@@ -745,7 +751,9 @@
* @return new PrefixQuery instance
*/
protected Query newPrefixQuery(Term prefix){
- return new PrefixQuery(prefix);
+ PrefixQuery query = new PrefixQuery(prefix);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -756,6 +764,7 @@
* @return new FuzzyQuery Instance
*/
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
+ // FuzzyQuery doesn't yet allow constant score rewrite
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
}
@@ -768,17 +777,9 @@
* @return new RangeQuery instance
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
- if(useOldRangeQuery)
- {
- return new RangeQuery(new Term(field, part1),
- new Term(field, part2),
- inclusive, rangeCollator);
- }
- else
- {
- return new ConstantScoreRangeQuery
- (field, part1, part2, inclusive, inclusive, rangeCollator);
- }
+ RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -795,7 +796,9 @@
* @return new WildcardQuery instance
*/
protected Query newWildcardQuery(Term t) {
- return new WildcardQuery(t);
+ WildcardQuery query = new WildcardQuery(t);
+ query.setConstantScoreRewrite(constantScoreRewrite);
+ return query;
}
/**
@@ -1247,7 +1250,6 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
- boolean rangein = false;
Query q;
}
{
Index: src/java/org/apache/lucene/queryParser/CharStream.java
===================================================================
--- src/java/org/apache/lucene/queryParser/CharStream.java (revision 712531)
+++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy)
@@ -109,4 +109,4 @@
void Done();
}
-/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
+/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ParseException.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ParseException.java (revision 712531)
+++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy)
@@ -195,4 +195,4 @@
}
}
-/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
+/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 712531)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -19,7 +19,6 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
Index: src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 712531)
+++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -18,81 +18,197 @@
*/
import java.io.IOException;
+import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils;
/**
* A {@link Query} that matches documents containing a subset of terms provided
* by a {@link FilteredTermEnum} enumeration.
*
- * MultiTermQuery is not designed to be used by itself.
- *
+ * MultiTermQuery is not designed to be used by itself.
* The reason being that it is not intialized with a {@link FilteredTermEnum}
* enumeration. A {@link FilteredTermEnum} enumeration needs to be provided.
*
* For example, {@link WildcardQuery} and {@link FuzzyQuery} extend
* MultiTermQuery to provide {@link WildcardTermEnum} and
* {@link FuzzyTermEnum}, respectively.
+ *
+ * The pattern Term may be null. A query that uses a null pattern Term should
+ * override equals and hashcode.
*/
public abstract class MultiTermQuery extends Query {
- private Term term;
+ protected Term term;
+ protected boolean constantScoreRewrite = false;
- /** Constructs a query for terms matching term. */
- public MultiTermQuery(Term term) {
- this.term = term;
- }
+ /** Constructs a query for terms matching term. */
+ public MultiTermQuery(Term term) {
+ this.term = term;
+ }
- /** Returns the pattern term. */
- public Term getTerm() { return term; }
+ /**
+ * Constructs a query matching terms that cannot be represented with a single
+ * Term.
+ */
+ public MultiTermQuery() {
+ }
- /** Construct the enumeration to be used, expanding the pattern term. */
- protected abstract FilteredTermEnum getEnum(IndexReader reader)
+ /** Returns the pattern term. */
+ public Term getTerm() {
+ return term;
+ }
+
+ /** Construct the enumeration to be used, expanding the pattern term. */
+ protected abstract FilteredTermEnum getEnum(IndexReader reader)
throws IOException;
- public Query rewrite(IndexReader reader) throws IOException {
+ protected Filter getFilter() {
+ return new MultiTermFilter(this);
+ }
+
+ public Query rewrite(IndexReader reader) throws IOException {
+ if (!constantScoreRewrite) {
FilteredTermEnum enumerator = getEnum(reader);
BooleanQuery query = new BooleanQuery(true);
try {
do {
Term t = enumerator.term();
if (t != null) {
- TermQuery tq = new TermQuery(t); // found a match
+ TermQuery tq = new TermQuery(t); // found a match
tq.setBoost(getBoost() * enumerator.difference()); // set the boost
- query.add(tq, BooleanClause.Occur.SHOULD); // add to query
+ query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
} while (enumerator.next());
} finally {
enumerator.close();
}
return query;
+ } else {
+ Query query = new ConstantScoreQuery(getFilter());
+ query.setBoost(getBoost());
+ return query;
}
+ }
- /** Prints a user-readable version of this query. */
- public String toString(String field) {
- StringBuffer buffer = new StringBuffer();
- if (!term.field().equals(field)) {
- buffer.append(term.field());
- buffer.append(":");
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ if (term != null) {
+ if (!term.field().equals(field)) {
+ buffer.append(term.field());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ } else {
+ buffer.append("termPattern:unknown");
+ }
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+
+ public boolean getConstantScoreRewrite() {
+ return constantScoreRewrite;
+ }
+
+ public void setConstantScoreRewrite(boolean constantScoreRewrite) {
+ this.constantScoreRewrite = constantScoreRewrite;
+ }
+
+ public boolean equals(Object o) {
+ if (o == null || term == null) {
+ throw new UnsupportedOperationException(
+ "MultiTermQuerys that do not use a pattern term need to override equals/hashcode");
+ }
+
+ if (this == o)
+ return true;
+ if (!(o instanceof MultiTermQuery))
+ return false;
+
+ final MultiTermQuery multiTermQuery = (MultiTermQuery) o;
+
+ if (!term.equals(multiTermQuery.term))
+ return false;
+
+ return getBoost() == multiTermQuery.getBoost();
+ }
+
+ public int hashCode() {
+ if (term == null) {
+ throw new UnsupportedOperationException(
+ "MultiTermQuerys that do not use a pattern term need to override equals/hashcode");
+ }
+ return term.hashCode() + Float.floatToRawIntBits(getBoost());
+ }
+
+ static class MultiTermFilter extends Filter {
+ MultiTermQuery mtq;
+
+ abstract class TermGenerator {
+ public void generate(IndexReader reader) throws IOException {
+ TermEnum enumerator = mtq.getEnum(reader);
+ TermDocs termDocs = reader.termDocs();
+ try {
+ do {
+ Term term = enumerator.term();
+ if (term == null)
+ break;
+ termDocs.seek(term);
+ while (termDocs.next()) {
+ handleDoc(termDocs.doc());
+ }
+ } while (enumerator.next());
+ } finally {
+ termDocs.close();
+ enumerator.close();
}
- buffer.append(term.text());
- buffer.append(ToStringUtils.boost(getBoost()));
- return buffer.toString();
+ }
+ abstract public void handleDoc(int doc);
}
+
+ public MultiTermFilter(MultiTermQuery mtq) {
+ this.mtq = mtq;
+ }
+ public BitSet bits(IndexReader reader) throws IOException {
+ final BitSet bitSet = new BitSet(reader.maxDoc());
+ new TermGenerator() {
+ public void handleDoc(int doc) {
+ bitSet.set(doc);
+ }
+ }.generate(reader);
+ return bitSet;
+ }
+
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ new TermGenerator() {
+ public void handleDoc(int doc) {
+ bitSet.set(doc);
+ }
+ }.generate(reader);
+
+ return bitSet;
+ }
+
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof MultiTermQuery)) return false;
- final MultiTermQuery multiTermQuery = (MultiTermQuery) o;
+ if (this == o)
+ return true;
+ if (!(o instanceof MultiTermFilter))
+ return false;
- if (!term.equals(multiTermQuery.term)) return false;
-
- return getBoost() == multiTermQuery.getBoost();
+ final MultiTermFilter filter = (MultiTermFilter) o;
+ return mtq.equals(filter.mtq);
}
-
+
public int hashCode() {
- return term.hashCode() + Float.floatToRawIntBits(getBoost());
+ return mtq.hashCode();
}
+ }
}
Index: src/java/org/apache/lucene/search/PrefixTermEnum.java
===================================================================
--- src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 0)
+++ src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 0)
@@ -0,0 +1,42 @@
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified prefix filter term.
+ *
+ * Term enumerations are always ordered by Term.compareTo(). Each term in
+ * the enumeration is greater than all that precede it.
+ *
+ */
+public class PrefixTermEnum extends FilteredTermEnum {
+
+ private Term prefix;
+ private boolean endEnum = false;
+
+ public PrefixTermEnum(IndexReader reader, Term prefix) throws IOException {
+ this.prefix = prefix;
+
+ setEnum(reader.terms(new Term(prefix.field(), prefix.text())));
+ }
+
+ public float difference() {
+ return 1.0f;
+ }
+
+ protected boolean endEnum() {
+ return endEnum;
+ }
+
+ protected boolean termCompare(Term term) {
+ if (term.field() == prefix.field() && term.text().startsWith(prefix.text())) {
+ return true;
+ }
+ endEnum = true;
+ return false;
+ }
+}
Property changes on: src/java/org/apache/lucene/search/PrefixTermEnum.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/search/FuzzyQuery.java
===================================================================
--- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 712531)
+++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy)
@@ -99,6 +99,10 @@
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
}
+
+ public void setConstantScoreRewrite(boolean constantScoreRewrite) {
+ throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query");
+ }
public Query rewrite(IndexReader reader) throws IOException {
FilteredTermEnum enumerator = getEnum(reader);
Index: src/java/org/apache/lucene/search/RangeFilter.java
===================================================================
--- src/java/org/apache/lucene/search/RangeFilter.java (revision 712531)
+++ src/java/org/apache/lucene/search/RangeFilter.java (working copy)
@@ -18,10 +18,6 @@
*/
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
@@ -44,6 +40,7 @@
private boolean includeLower;
private boolean includeUpper;
private Collator collator;
+ private RangeQuery rangeQuery;
/**
* @param fieldName The field this range applies to
@@ -75,6 +72,7 @@
throw new IllegalArgumentException
("The upper bound must be non-null to be inclusive");
}
+ initRangeQuery();
}
/**
@@ -99,8 +97,13 @@
Collator collator) {
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
this.collator = collator;
+ initRangeQuery();
}
+ private void initRangeQuery() {
+ rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ }
+
/**
* Constructs a filter for field fieldName matching
* less than or equal to upperTerm.
@@ -124,81 +127,7 @@
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
- BitSet bits = new BitSet(reader.maxDoc());
- TermEnum enumerator =
- (null != lowerTerm && collator == null
- ? reader.terms(new Term(fieldName, lowerTerm))
- : reader.terms(new Term(fieldName)));
-
- try {
-
- if (enumerator.term() == null) {
- return bits;
- }
-
- TermDocs termDocs = reader.termDocs();
- try {
- if (collator != null) {
- do {
- Term term = enumerator.term();
- if (term != null && term.field().equals(fieldName)) {
- if ((lowerTerm == null
- || (includeLower
- ? collator.compare(term.text(), lowerTerm) >= 0
- : collator.compare(term.text(), lowerTerm) > 0))
- && (upperTerm == null
- || (includeUpper
- ? collator.compare(term.text(), upperTerm) <= 0
- : collator.compare(term.text(), upperTerm) < 0))) {
- /* we have a good term, find the docs */
- termDocs.seek(enumerator.term());
- while (termDocs.next()) {
- bits.set(termDocs.doc());
- }
- }
- }
- }
- while (enumerator.next());
- } else { // collator is null - use Unicode code point ordering
- boolean checkLower = false;
- if (!includeLower) // make adjustments to set to exclusive
- checkLower = true;
-
- do {
- Term term = enumerator.term();
- if (term != null && term.field().equals(fieldName)) {
- if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
- checkLower = false;
- if (upperTerm != null) {
- int compare = upperTerm.compareTo(term.text());
- /* if beyond the upper term, or is exclusive and
- * this is equal to the upper term, break out */
- if ((compare < 0) ||
- (!includeUpper && compare==0)) {
- break;
- }
- }
- /* we have a good term, find the docs */
-
- termDocs.seek(enumerator.term());
- while (termDocs.next()) {
- bits.set(termDocs.doc());
- }
- }
- } else {
- break;
- }
- }
- while (enumerator.next());
- }
- } finally {
- termDocs.close();
- }
- } finally {
- enumerator.close();
- }
-
- return bits;
+ return rangeQuery.getFilter().bits(reader);
}
/**
@@ -206,84 +135,7 @@
* permitted in search results.
*/
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- OpenBitSet bits = new OpenBitSet(reader.maxDoc());
-
- TermEnum enumerator =
- (null != lowerTerm && collator == null
- ? reader.terms(new Term(fieldName, lowerTerm))
- : reader.terms(new Term(fieldName)));
-
- try {
-
- if (enumerator.term() == null) {
- return bits;
- }
-
- TermDocs termDocs = reader.termDocs();
-
- try {
- if (collator != null) {
- do {
- Term term = enumerator.term();
- if (term != null && term.field().equals(fieldName)) {
- if ((lowerTerm == null
- || (includeLower
- ? collator.compare(term.text(), lowerTerm) >= 0
- : collator.compare(term.text(), lowerTerm) > 0))
- && (upperTerm == null
- || (includeUpper
- ? collator.compare(term.text(), upperTerm) <= 0
- : collator.compare(term.text(), upperTerm) < 0))) {
- /* we have a good term, find the docs */
- termDocs.seek(enumerator.term());
- while (termDocs.next()) {
- bits.set(termDocs.doc());
- }
- }
- }
- }
- while (enumerator.next());
- } else { // collator is null - use Unicode code point ordering
- boolean checkLower = false;
- if (!includeLower) // make adjustments to set to exclusive
- checkLower = true;
-
- do {
- Term term = enumerator.term();
- if (term != null && term.field().equals(fieldName)) {
- if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
- checkLower = false;
- if (upperTerm != null) {
- int compare = upperTerm.compareTo(term.text());
- /* if beyond the upper term, or is exclusive and
- * this is equal to the upper term, break out */
- if ((compare < 0) ||
- (!includeUpper && compare==0)) {
- break;
- }
- }
- /* we have a good term, find the docs */
-
- termDocs.seek(enumerator.term());
- while (termDocs.next()) {
- bits.set(termDocs.doc());
- }
- }
- } else {
- break;
- }
- }
- while (enumerator.next());
- }
-
- } finally {
- termDocs.close();
- }
- } finally {
- enumerator.close();
- }
-
- return bits;
+ return rangeQuery.getFilter().getDocIdSet(reader);
}
public String toString() {
Index: src/java/org/apache/lucene/search/RangeTermEnum.java
===================================================================
--- src/java/org/apache/lucene/search/RangeTermEnum.java (revision 0)
+++ src/java/org/apache/lucene/search/RangeTermEnum.java (revision 0)
@@ -0,0 +1,123 @@
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified range parameters.
+ *
+ * Term enumerations are always ordered by Term.compareTo(). Each term in
+ * the enumeration is greater than all that precede it.
+ */
+public class RangeTermEnum extends FilteredTermEnum {
+
+ private Collator collator = null;
+ private boolean endEnum = false;
+ private String field;
+ private String upperTermText;
+ private String lowerTermText;
+ private boolean includeLower;
+ private boolean includeUpper;
+
+ /**
+ * @param reader
+ * @param collator
+ * The collator to use to collate index Terms, to determine their
+ * membership in the range bounded by lowerTerm and
+ * upperTerm.
+ * @param field
+ * An interned field that holds both lower and upper terms.
+ * @param lowerTermText
+ * The term text at the lower end of the range
+ * @param upperTermText
+ * The term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is included in the range.
+ *
+ * @throws IOException
+ */
+ public RangeTermEnum(IndexReader reader, Collator collator, String field,
+ String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper) throws IOException {
+ this.collator = collator;
+ this.upperTermText = upperTermText;
+ this.lowerTermText = lowerTermText;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.field = field;
+
+ // do a little bit of normalization...
+ // open ended range queries should always be inclusive.
+ if (this.lowerTermText == null) {
+ this.lowerTermText = "";
+ this.includeLower = true;
+ }
+
+ if (this.upperTermText == null) {
+ this.includeUpper = true;
+ }
+
+ setEnum(reader.terms(new Term(this.field, this.lowerTermText)));
+ }
+
+ public float difference() {
+ return 1.0f;
+ }
+
+ protected boolean endEnum() {
+ return endEnum;
+ }
+
+ protected boolean termCompare(Term term) {
+ if (collator == null) {
+ // Use Unicode code point ordering
+ boolean checkLower = false;
+ if (!includeLower) // make adjustments to set to exclusive
+ checkLower = true;
+ if (term != null && term.field() == field) { // interned comparison
+ if (!checkLower || null==lowerTermText || term.text().compareTo(lowerTermText) > 0) {
+ checkLower = false;
+ if (upperTermText != null) {
+ int compare = upperTermText.compareTo(term.text());
+ /*
+ * if beyond the upper term, or is exclusive and this is equal to
+ * the upper term, break out
+ */
+ if ((compare < 0) ||
+ (!includeUpper && compare==0)) {
+ endEnum = true;
+ return false;
+ }
+ }
+ return true;
+ }
+ } else {
+ // break
+ endEnum = true;
+ return false;
+ }
+ return false;
+ } else {
+ if (term != null && term.field() == field) { // interned comparison
+ if ((lowerTermText == null
+ || (includeLower
+ ? collator.compare(term.text(), lowerTermText) >= 0
+ : collator.compare(term.text(), lowerTermText) > 0))
+ && (upperTermText == null
+ || (includeUpper
+ ? collator.compare(term.text(), upperTermText) <= 0
+ : collator.compare(term.text(), upperTermText) < 0))) {
+ return true;
+ }
+ return false;
+ }
+ endEnum = true;
+ return false;
+ }
+ }
+}
Property changes on: src/java/org/apache/lucene/search/RangeTermEnum.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/search/RangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/RangeQuery.java (revision 712531)
+++ src/java/org/apache/lucene/search/RangeQuery.java (working copy)
@@ -21,234 +21,205 @@
import java.text.Collator;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.util.ToStringUtils;
/**
* A Query that matches documents within an exclusive range. A RangeQuery
* is built by QueryParser for input like [010 TO 120] but only if the QueryParser has
* the useOldRangeQuery property set to true. The QueryParser default behaviour is to use
- * the newer ConstantScoreRangeQuery class. This is generally preferable because:
+ * the newer ConstantScore mode. This is generally preferable because:
*
lowerTerm but less than upperTerm.
- * There must be at least one term and either term may be null,
- * in which case there is no bound on that side, but if there are
- * two terms, both terms must be for the same field.
- *
- * @param lowerTerm The Term at the lower end of the range
- * @param upperTerm The Term at the upper end of the range
- * @param inclusive If true, both lowerTerm and
- * upperTerm will themselves be included in the range.
- */
- public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive)
- {
- if (lowerTerm == null && upperTerm == null)
- {
- throw new IllegalArgumentException("At least one term must be non-null");
- }
- if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field())
- {
- throw new IllegalArgumentException("Both terms must be for the same field");
- }
- // if we have a lowerTerm, start there. otherwise, start at beginning
- if (lowerTerm != null) {
- this.lowerTerm = lowerTerm;
- }
- else {
- this.lowerTerm = new Term(upperTerm.field());
- }
+ /**
+ * Constructs a query selecting all terms greater than lowerTerm
+ * but less than upperTerm. There must be at least one term and
+ * either term may be null, in which case there is no bound on that side.
+ *
+ * @param field The field that holds both lower and upper terms.
+ * @param lowerTerm
+ * The term text at the lower end of the range
+ * @param upperTerm
+ * The term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is
+ * included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is
+ * included in the range.
+ */
+ public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+ init(new Term(field, lowerTerm), new Term(field, upperTerm), includeLower, includeUpper, null);
+ }
- this.upperTerm = upperTerm;
- this.inclusive = inclusive;
- }
+ /** Constructs a query selecting all terms greater than
+ * lowerTerm but less than upperTerm.
+ * There must be at least one term and either term may be null,
+ * in which case there is no bound on that side.
+ *
+ * If collator is not null, it will be used to decide whether
+ * index terms are within the given range, rather than using the Unicode code
+ * point order in which index terms are stored.
+ *
+ * WARNING: Using this constructor and supplying a non-null
+ * value in the collator parameter will cause every single
+ * index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ * examined. Depending on the number of index Terms in this Field, the
+ * operation could be very slow.
+ *
+ * @param lowerTerm The Term text at the lower end of the range
+ * @param upperTerm The Term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is
+ * included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is
+ * included in the range.
+ * @param collator The collator to use to collate index Terms, to determine
+ * their membership in the range bounded by lowerTerm and
+ * upperTerm.
+ */
+ public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
+ Collator collator) {
+ init(new Term(field, lowerTerm), new Term(field,upperTerm), includeLower, includeUpper, collator);
+ }
- /** Constructs a query selecting all terms greater than
- * lowerTerm but less than upperTerm.
- * There must be at least one term and either term may be null,
- * in which case there is no bound on that side, but if there are
- * two terms, both terms must be for the same field.
- *
- * If collator is not null, it will be used to decide whether
- * index terms are within the given range, rather than using the Unicode code
- * point order in which index terms are stored.
- *
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The Term at the lower end of the range
- * @param upperTerm The Term at the upper end of the range
- * @param inclusive If true, both lowerTerm and
- * upperTerm will themselves be included in the range.
- * @param collator The collator to use to collate index Terms, to determine
- * their membership in the range bounded by lowerTerm and
- * upperTerm.
- */
- public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
- Collator collator)
- {
- this(lowerTerm, upperTerm, inclusive);
- this.collator = collator;
- }
+ /** @deprecated Please use {@link #RangeQuery(String,
+ * String, String, boolean, boolean, Collator)} instead */
+ public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
+ Collator collator) {
+ init(lowerTerm, upperTerm, inclusive, inclusive, collator);
+ }
+
+ /** @deprecated Please use {@link #RangeQuery(String,
+ * String, String, boolean, boolean)} instead */
+ public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) {
+ init(lowerTerm, upperTerm, inclusive, inclusive, null);
+ }
- public Query rewrite(IndexReader reader) throws IOException {
+ private void init(Term lowerTerm, Term upperTerm, boolean includeLower, boolean includeUpper, Collator collator) {
+ if (lowerTerm == null && upperTerm == null)
+ throw new IllegalArgumentException("At least one term must be non-null");
+ if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field())
+ throw new IllegalArgumentException("Both terms must be for the same field");
- BooleanQuery query = new BooleanQuery(true);
- String testField = getField();
- if (collator != null) {
- TermEnum enumerator = reader.terms(new Term(testField, ""));
- String lowerTermText = lowerTerm != null ? lowerTerm.text() : null;
- String upperTermText = upperTerm != null ? upperTerm.text() : null;
+ if (lowerTerm == null)
+ this.field = upperTerm.field();
+ else
+ this.field = lowerTerm.field();
+ this.lowerTerm = lowerTerm;
+ this.upperTerm = upperTerm;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.collator = collator;
+ }
+
+ /** Returns the field name for this query */
+ public String getField() {
+ return field;
+ }
- try {
- do {
- Term term = enumerator.term();
- if (term != null && term.field() == testField) { // interned comparison
- if ((lowerTermText == null
- || (inclusive ? collator.compare(term.text(), lowerTermText) >= 0
- : collator.compare(term.text(), lowerTermText) > 0))
- && (upperTermText == null
- || (inclusive ? collator.compare(term.text(), upperTermText) <= 0
- : collator.compare(term.text(), upperTermText) < 0))) {
- addTermToQuery(term, query);
- }
- }
- }
- while (enumerator.next());
- }
- finally {
- enumerator.close();
- }
- }
- else { // collator is null
- TermEnum enumerator = reader.terms(lowerTerm);
+ /** Returns the lower term of this range query.
+ * @deprecated Use {@link #getLowerTermText} instead. */
+ public Term getLowerTerm() { return lowerTerm; }
- try {
+ /** Returns the upper term of this range query.
+ * @deprecated Use {@link #getUpperTermText} instead. */
+ public Term getUpperTerm() { return upperTerm; }
+
+ /** Returns the lower value of this range query */
+ public String getLowerTermText() { return lowerTerm == null ? null : lowerTerm.text(); }
- boolean checkLower = false;
- if (!inclusive) // make adjustments to set to exclusive
- checkLower = true;
+ /** Returns the upper value of this range query */
+ public String getUpperTermText() { return upperTerm == null ? null : upperTerm.text(); }
+
+ /** Returns true if the lower endpoint is inclusive */
+ public boolean includesLower() { return includeLower; }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return includeUpper; }
- do {
- Term term = enumerator.term();
- if (term != null && term.field() == testField) { // interned comparison
- if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
- checkLower = false;
- if (upperTerm != null) {
- int compare = upperTerm.text().compareTo(term.text());
- /* if beyond the upper term, or is exclusive and
- * this is equal to the upper term, break out */
- if ((compare < 0) || (!inclusive && compare == 0))
- break;
- }
- addTermToQuery(term, query); // Found a match
- }
- }
- else {
- break;
- }
- }
- while (enumerator.next());
- }
- finally {
- enumerator.close();
- }
- }
- return query;
- }
+ /** Returns true if the range query is inclusive
+ * @deprecated Use {@link #includesLower}, {@link #includesUpper} instead.
+ */
+ public boolean isInclusive() { return includeUpper && includeLower; }
- private void addTermToQuery(Term term, BooleanQuery query) {
- TermQuery tq = new TermQuery(term);
- tq.setBoost(getBoost()); // set the boost
- query.add(tq, BooleanClause.Occur.SHOULD); // add to query
- }
+ /** Returns the collator used to determine range inclusion, if any. */
+ public Collator getCollator() { return collator; }
+
+ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ return new RangeTermEnum(reader, collator, getField(), lowerTerm.text(),
+ upperTerm.text(), includeLower, includeUpper);
+ }
- /** Returns the field name for this query */
- public String getField() {
- return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
- }
+ /** Prints a user-readable version of this query. */
+ public String toString(String field) {
+ StringBuffer buffer = new StringBuffer();
+ if (!getField().equals(field)) {
+ buffer.append(getField());
+ buffer.append(":");
+ }
+ buffer.append(includeLower ? '[' : '{');
+ buffer.append(lowerTerm != null ? lowerTerm.text() : "*");
+ buffer.append(" TO ");
+ buffer.append(upperTerm != null ? upperTerm.text() : "*");
+ buffer.append(includeUpper ? ']' : '}');
+ if (getBoost() != 1.0f) {
+ buffer.append("^");
+ buffer.append(Float.toString(getBoost()));
+ }
+ return buffer.toString();
+ }
- /** Returns the lower term of this range query */
- public Term getLowerTerm() { return lowerTerm; }
+ /** Returns true iff o is equal to this. */
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof RangeQuery)) return false;
+ RangeQuery other = (RangeQuery) o;
- /** Returns the upper term of this range query */
- public Term getUpperTerm() { return upperTerm; }
+ if (this.field != other.field // interned comparison
+ || this.includeLower != other.includeLower
+ || this.includeUpper != other.includeUpper
+ || (this.collator != null && ! this.collator.equals(other.collator))
+ ) { return false; }
+ String lowerVal = this.lowerTerm == null ? null : lowerTerm.text();
+ String upperVal = this.upperTerm == null ? null : upperTerm.text();
+ String olowerText = other.lowerTerm == null ? null : other.lowerTerm.text();
+ String oupperText = other.upperTerm == null ? null : other.upperTerm.text();
+ if (lowerVal != null ? !lowerVal.equals(olowerText) : olowerText != null) return false;
+ if (upperVal != null ? !upperVal.equals(oupperText) : oupperText != null) return false;
+ return this.getBoost() == other.getBoost();
+ }
- /** Returns true if the range query is inclusive */
- public boolean isInclusive() { return inclusive; }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return collator; }
-
-
- /** Prints a user-readable version of this query. */
- public String toString(String field)
- {
- StringBuffer buffer = new StringBuffer();
- if (!getField().equals(field))
- {
- buffer.append(getField());
- buffer.append(":");
- }
- buffer.append(inclusive ? "[" : "{");
- buffer.append(lowerTerm != null ? lowerTerm.text() : "null");
- buffer.append(" TO ");
- buffer.append(upperTerm != null ? upperTerm.text() : "null");
- buffer.append(inclusive ? "]" : "}");
- buffer.append(ToStringUtils.boost(getBoost()));
- return buffer.toString();
- }
-
- /** Returns true iff o is equal to this. */
- public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof RangeQuery)) return false;
-
- final RangeQuery other = (RangeQuery) o;
- if (this.getBoost() != other.getBoost()) return false;
- if (this.inclusive != other.inclusive) return false;
- if (this.collator != null && ! this.collator.equals(other.collator))
- return false;
-
- // one of lowerTerm and upperTerm can be null
- if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
- if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
- return true;
- }
-
- /** Returns a hash code value for this object.*/
- public int hashCode() {
- int h = Float.floatToIntBits(getBoost());
- h ^= lowerTerm != null ? lowerTerm.hashCode() : 0;
- // reversible mix to make lower and upper position dependent and
- // to prevent them from cancelling out.
- h ^= (h << 25) | (h >>> 8);
- h ^= upperTerm != null ? upperTerm.hashCode() : 0;
- h ^= this.inclusive ? 0x2742E74A : 0;
- h ^= collator != null ? collator.hashCode() : 0;
- return h;
- }
+ /** Returns a hash code value for this object.*/
+ public int hashCode() {
+ int h = Float.floatToIntBits(getBoost()) ^ field.hashCode();
+ String lowerVal = this.lowerTerm == null ? null : lowerTerm.text();
+ String upperVal = this.upperTerm == null ? null : upperTerm.text();
+ // hashCode of "" is 0, so don't use that for null...
+ h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a;
+ // don't just XOR upperVal with out mixing either it or h, as it will cancel
+ // out lowerVal if they are equal.
+ h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix
+ h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
+ h ^= (includeLower ? 0x665599aa : 0)
+ ^ (includeUpper ? 0x99aa5566 : 0);
+ h ^= collator != null ? collator.hashCode() : 0;
+ return h;
+ }
}
Index: src/java/org/apache/lucene/search/PrefixQuery.java
===================================================================
--- src/java/org/apache/lucene/search/PrefixQuery.java (revision 712531)
+++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy)
@@ -19,48 +19,33 @@
import java.io.IOException;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
* is built by QueryParser for input like app*. */
-public class PrefixQuery extends Query {
+public class PrefixQuery extends MultiTermQuery {
private Term prefix;
/** Constructs a query for terms starting with prefix. */
public PrefixQuery(Term prefix) {
+ super(prefix);
this.prefix = prefix;
}
/** Returns the prefix of this query. */
public Term getPrefix() { return prefix; }
+
+ protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+ return new PrefixTermEnum(reader, getTerm());
+ }
- public Query rewrite(IndexReader reader) throws IOException {
- BooleanQuery query = new BooleanQuery(true);
- TermEnum enumerator = reader.terms(prefix);
- try {
- String prefixText = prefix.text();
- String prefixField = prefix.field();
- do {
- Term term = enumerator.term();
- if (term != null &&
- term.text().startsWith(prefixText) &&
- term.field() == prefixField) // interned comparison
- {
- TermQuery tq = new TermQuery(term); // found a match
- tq.setBoost(getBoost()); // set the boost
- query.add(tq, BooleanClause.Occur.SHOULD); // add to query
- //System.out.println("added " + term);
- } else {
- break;
- }
- } while (enumerator.next());
- } finally {
- enumerator.close();
- }
- return query;
+ public boolean equals(Object o) {
+ if (o instanceof PrefixQuery)
+ return super.equals(o);
+
+ return false;
}
/** Prints a user-readable version of this query. */
@@ -75,18 +60,4 @@
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
-
- /** Returns true iff o is equal to this. */
- public boolean equals(Object o) {
- if (!(o instanceof PrefixQuery))
- return false;
- PrefixQuery other = (PrefixQuery)o;
- return (this.getBoost() == other.getBoost())
- && this.prefix.equals(other.prefix);
- }
-
- /** Returns a hash code value for this object.*/
- public int hashCode() {
- return Float.floatToIntBits(getBoost()) ^ prefix.hashCode() ^ 0x6634D93C;
- }
}
Index: src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (revision 712531)
+++ src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java (working copy)
@@ -17,9 +17,6 @@
* limitations under the License.
*/
-import org.apache.lucene.index.IndexReader;
-
-import java.io.IOException;
import java.text.Collator;
/**
@@ -32,121 +29,31 @@
* Either or both endpoints may be open. Open endpoints may not be exclusive
* (you can't select all but the first or last term without explicitly specifying the term to exclude.)
*
- *
+ * @deprecated Please use {@link RangeQuery}, and call
+ * {@link RangeQuery#setConstantScoreRewrite}, instead.
* @version $Id$
*/
-
-public class ConstantScoreRangeQuery extends Query
+public class ConstantScoreRangeQuery extends RangeQuery
{
- private final String fieldName;
- private final String lowerVal;
- private final String upperVal;
- private final boolean includeLower;
- private final boolean includeUpper;
- private Collator collator;
-
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
{
- // do a little bit of normalization...
- // open ended range queries should always be inclusive.
- if (lowerVal==null) {
- includeLower=true;
- } else if (includeLower && lowerVal.equals("")) {
- lowerVal=null;
- }
- if (upperVal==null) {
- includeUpper=true;
- }
-
-
- this.fieldName = fieldName.intern(); // intern it, just like terms...
- this.lowerVal = lowerVal;
- this.upperVal = upperVal;
- this.includeLower = includeLower;
- this.includeUpper = includeUpper;
+ super(fieldName, lowerVal, upperVal, includeLower, includeUpper);
+ this.constantScoreRewrite = true;
}
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
String upperVal, boolean includeLower,
- boolean includeUpper, Collator collator)
- {
- this(fieldName, lowerVal, upperVal, includeLower, includeUpper);
- this.collator = collator;
+ boolean includeUpper, Collator collator) {
+ super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator);
+ this.constantScoreRewrite = true;
}
- /** Returns the field name for this query */
- public String getField() { return fieldName; }
- /** Returns the value of the lower endpoint of this range query, null if open ended */
- public String getLowerVal() { return lowerVal; }
- /** Returns the value of the upper endpoint of this range query, null if open ended */
- public String getUpperVal() { return upperVal; }
- /** Returns true if the lower endpoint is inclusive */
- public boolean includesLower() { return includeLower; }
- /** Returns true if the upper endpoint is inclusive */
- public boolean includesUpper() { return includeUpper; }
-
- public Query rewrite(IndexReader reader) throws IOException {
- // Map to RangeFilter semantics which are slightly different...
- RangeFilter rangeFilt = new RangeFilter
- (fieldName, lowerVal != null?lowerVal:"", upperVal,
- lowerVal==""?false:includeLower, upperVal==null?false:includeUpper,
- collator);
- Query q = new ConstantScoreQuery(rangeFilt);
- q.setBoost(getBoost());
- return q;
+ public String getLowerVal() {
+ return getLowerTermText();
}
- /** Prints a user-readable version of this query. */
- public String toString(String field)
- {
- StringBuffer buffer = new StringBuffer();
- if (!getField().equals(field))
- {
- buffer.append(getField());
- buffer.append(":");
- }
- buffer.append(includeLower ? '[' : '{');
- buffer.append(lowerVal != null ? lowerVal : "*");
- buffer.append(" TO ");
- buffer.append(upperVal != null ? upperVal : "*");
- buffer.append(includeUpper ? ']' : '}');
- if (getBoost() != 1.0f)
- {
- buffer.append("^");
- buffer.append(Float.toString(getBoost()));
- }
- return buffer.toString();
- }
-
- /** Returns true if o is equal to this. */
- public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof ConstantScoreRangeQuery)) return false;
- ConstantScoreRangeQuery other = (ConstantScoreRangeQuery) o;
-
- if (this.fieldName != other.fieldName // interned comparison
- || this.includeLower != other.includeLower
- || this.includeUpper != other.includeUpper
- || (this.collator != null && ! this.collator.equals(other.collator))
- ) { return false; }
- if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
- if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
- return this.getBoost() == other.getBoost();
- }
-
- /** Returns a hash code value for this object.*/
- public int hashCode() {
- int h = Float.floatToIntBits(getBoost()) ^ fieldName.hashCode();
- // hashCode of "" is 0, so don't use that for null...
- h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a;
- // don't just XOR upperVal with out mixing either it or h, as it will cancel
- // out lowerVal if they are equal.
- h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix
- h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
- h ^= (includeLower ? 0x665599aa : 0)
- ^ (includeUpper ? 0x99aa5566 : 0);
- h ^= collator != null ? collator.hashCode() : 0;
- return h;
- }
+ public String getUpperVal() {
+ return getUpperTermText();
+ }
}
Index: src/java/org/apache/lucene/search/PrefixFilter.java
===================================================================
--- src/java/org/apache/lucene/search/PrefixFilter.java (revision 712531)
+++ src/java/org/apache/lucene/search/PrefixFilter.java (working copy)
@@ -17,25 +17,28 @@
* limitations under the License.
*/
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.TermDocs;
-
-import java.util.BitSet;
import java.io.IOException;
+import java.util.BitSet;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+
/**
- *
- * @version $Id$
+ * A Filter that restricts search results to values that have a matching prefix in a given
+ * field.
+ *
+ *
+ * This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter + * + *
*/ public class PrefixFilter extends Filter { protected final Term prefix; + private PrefixQuery prefixQuery; public PrefixFilter(Term prefix) { this.prefix = prefix; + this.prefixQuery = new PrefixQuery(prefix); } public Term getPrefix() { return prefix; } @@ -44,23 +47,11 @@ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. */ public BitSet bits(IndexReader reader) throws IOException { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + return prefixQuery.getFilter().bits(reader); } public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + return prefixQuery.getFilter().getDocIdSet(reader); } /** Prints a user-readable version of this query. */ @@ -71,52 +62,8 @@ buffer.append(")"); return buffer.toString(); } -} -// keep this protected until I decide if it's a good way -// to separate id generation from collection (or should -// I just reuse hitcollector???) -interface IdGenerator { - public void generate(IndexReader reader) throws IOException; - public void handleDoc(int doc); } -abstract class PrefixGenerator implements IdGenerator { - protected final Term prefix; - PrefixGenerator(Term prefix) { - this.prefix = prefix; - } - - public void generate(IndexReader reader) throws IOException { - TermEnum enumerator = reader.terms(prefix); - TermDocs termDocs = reader.termDocs(); - - try { - - String prefixText = prefix.text(); - String prefixField = prefix.field(); - do { - Term term = enumerator.term(); - if (term != null && - term.text().startsWith(prefixText) && - term.field() == prefixField) // interned comparison - { - termDocs.seek(term); - while (termDocs.next()) { - handleDoc(termDocs.doc()); - } - } else { - break; - } - } while (enumerator.next()); - } finally { - termDocs.close(); - enumerator.close(); - } - } -} - - - Index: src/java/org/apache/lucene/search/WildcardTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/WildcardTermEnum.java (revision 712531) +++ src/java/org/apache/lucene/search/WildcardTermEnum.java (working copy) @@ -40,9 +40,7 @@ boolean endEnum = false; /** - * Creates a newWildcardTermEnum. Passing in a
- * {@link org.apache.lucene.index.Term Term} that does not contain a
- * WILDCARD_CHAR will cause an exception to be thrown.
+ * Creates a new WildcardTermEnum.
* * After calling the constructor the enumeration is already pointing to the first * valid term if such a term exists. @@ -62,8 +60,12 @@ else if (cidx >= 0) { idx = Math.min(idx, cidx); } + if (idx != -1) { + pre = searchTerm.text().substring(0,idx); + } else { + pre = ""; + } - pre = searchTerm.text().substring(0,idx); preLen = pre.length(); text = text.substring(preLen); setEnum(reader.terms(new Term(searchTerm.field(), pre))); Index: src/java/org/apache/lucene/search/WildcardQuery.java =================================================================== --- src/java/org/apache/lucene/search/WildcardQuery.java (revision 712531) +++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy) @@ -48,12 +48,11 @@ return false; } - + public Query rewrite(IndexReader reader) throws IOException { - if (this.termContainsWildcard) { - return super.rewrite(reader); - } - + if (!termContainsWildcard) return new TermQuery(getTerm()); + else + return super.rewrite(reader); } } Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 712531) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -44,7 +44,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; @@ -52,6 +51,7 @@ import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; @@ -462,14 +462,8 @@ public void testGetConstantScoreRangeFragments() throws Exception { numHighlights = 0; - String queryString = FIELD_NAME + ":[kannedy TO kznnedy]"; - // Need to explicitly set the QueryParser property to use RangeQuery - // rather - // than RangeFilters - QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); - // parser.setUseOldRangeQuery(true); - query = parser.parse(queryString); + query = new ConstantScoreRangeQuery(FIELD_NAME, "kannedy", "kznnedy", true, true); searcher = new IndexSearcher(ramDir); // can't rewrite ConstantScoreRangeQuery if you want to highlight it - @@ -1043,6 +1037,7 @@ searchers[1] = new IndexSearcher(ramDir2); MultiSearcher multiSearcher = new MultiSearcher(searchers); QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); + parser.setConstantScoreRewrite(false); query = parser.parse("multi*"); System.out.println("Searching for: " + query.toString(FIELD_NAME)); // at this point the multisearcher calls combine(query[]) @@ -1343,6 +1338,7 @@ public void doSearching(String queryString) throws Exception { QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer()); + parser.setConstantScoreRewrite(false); query = parser.parse(queryString); doSearching(query); }