Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -189,12 +189,12 @@ assertFalse("queries with different inclusive are not equal", query.equals(other)); query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); + other = new CollatedTermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); assertFalse("a query with a collator is not equal to one without", query.equals(other)); } public void testExclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); + Query query = new CollatedTermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -215,7 +215,7 @@ } public void testInclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); + Query query = new CollatedTermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -241,7 +241,7 @@ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator); + Query query = new CollatedTermRangeQuery("content", "\u062F", "\u0698", true, true, collator); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeQuery with a Farsi @@ -251,7 +251,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); - query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator); + query = new CollatedTermRangeQuery("content", "\u0633", "\u0638",true, true, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); searcher.close(); @@ -262,7 +262,7 @@ // Danish collation orders the words below in the given order (example taken // from TestSort.testInternationalSort() ). String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); + Query query = new CollatedTermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. @@ -271,7 +271,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); - query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); + query = new CollatedTermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); searcher.close(); Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -99,7 +99,7 @@ /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih, Collator c) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); + CollatedTermRangeQuery query = new CollatedTermRangeQuery(f, l, h, il, ih, c); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } Index: lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (working copy) @@ -161,84 +161,84 @@ Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, + int numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, maxIP, T, T, c), 1000).totalHits; assertEquals("find all", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; assertEquals("all but last", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; assertEquals("all but first", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; assertEquals("all but ends", numDocs - 2, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; assertEquals("med and up", 1 + maxId - medId, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; assertEquals("up to med", 1 + medId - minId, numHits); // unbounded id - numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, null, T, F, c), 1000).totalHits; assertEquals("min and up", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, maxIP, F, T, c), 1000).totalHits; assertEquals("max and down", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, null, F, F, c), 1000).totalHits; assertEquals("not min, but up", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, maxIP, F, F, c), 1000).totalHits; assertEquals("not max, but down", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; assertEquals("med and up, not max", maxId - medId, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; assertEquals("not min, up to med", medId - minId, numHits); // very small sets numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; assertEquals("min,min,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; assertEquals("med,med,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; assertEquals("max,max,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, minIP, F, T, c), 1000).totalHits; assertEquals("nul,min,F,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", maxIP, null, T, F, c), 1000).totalHits; assertEquals("max,nul,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; assertEquals("med,med,T,T", 1, numHits); } @@ -337,61 +337,61 @@ // test extremes, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, + int numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, T, T, c), 1000).totalHits; assertEquals("find all", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, T, F, c), 1000).totalHits; assertEquals("all but biggest", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, F, T, c), 1000).totalHits; assertEquals("all but smallest", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, F, F, c), 1000).totalHits; assertEquals("all but extremes", numDocs - 2, numHits); // unbounded numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; assertEquals("smallest and up", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; assertEquals("biggest and down", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; assertEquals("not smallest, but up", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; assertEquals("not biggest, but down", numDocs - 1, numHits); // very small sets - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, minRP, F, F, c), 1000).totalHits; assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", maxRP, maxRP, F, F, c), 1000).totalHits; assertEquals("max,max,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, minRP, T, T, c), 1000).totalHits; assertEquals("min,min,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; assertEquals("nul,min,F,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", maxRP, maxRP, T, T, c), 1000).totalHits; assertEquals("max,max,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; assertEquals("max,nul,T,T", 1, numHits); } @@ -423,11 +423,11 @@ // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). - int numHits = search.search(q, new TermRangeFilter("content", "\u062F", + int numHits = search.search(q, new CollatedTermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000).totalHits; assertEquals("The index Term should not be included.", 0, numHits); - numHits = search.search(q, new TermRangeFilter("content", "\u0633", + numHits = search.search(q, new CollatedTermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000).totalHits; assertEquals("The index Term should be included.", 1, numHits); search.close(); @@ -461,11 +461,11 @@ // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. - int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T", + int numHits = search.search(q, new CollatedTermRangeFilter("content", "H\u00D8T", "MAND", F, F, collator), 1000).totalHits; assertEquals("The index Term should be included.", 1, numHits); - numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T", + numHits = search.search(q, new CollatedTermRangeFilter("content", "H\u00C5T", "MAND", F, F, collator), 1000).totalHits; assertEquals("The index Term should not be included.", 0, numHits); search.close(); Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -882,7 +883,9 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + final MultiTermQuery query = rangeCollator == null ? + new TermRangeQuery(field, part1, part2, inclusive, inclusive) : + new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -1569,16 +1572,6 @@ finally { jj_save(0, xla); } } - private boolean jj_3_1() { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_2()) { - jj_scanpos = xsp; - if (jj_3R_3()) return true; - } - return false; - } - private boolean jj_3R_3() { if (jj_scan_token(STAR)) return true; if (jj_scan_token(COLON)) return true; @@ -1591,6 +1584,16 @@ return false; } + private boolean jj_3_1() { + Token xsp; + xsp = jj_scanpos; + if (jj_3R_2()) { + jj_scanpos = xsp; + if (jj_3R_3()) return true; + } + return false; + } + /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -47,6 +47,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -906,7 +907,9 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + final MultiTermQuery query = rangeCollator == null ? + new TermRangeQuery(field, part1, part2, inclusive, inclusive) : + new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 982481) +++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; import java.util.Comparator; import org.apache.lucene.index.IndexReader; @@ -32,8 +31,6 @@ * greater than all that precede it.

*/ public class TermRangeTermsEnum extends FilteredTermsEnum { - - private Collator collator; private String upperTermText; private String lowerTermText; private boolean includeLower; @@ -62,17 +59,12 @@ * If true, the lowerTerm is included in the range. * @param includeUpper * If true, the upperTerm is included in the range. - * @param collator - * The collator to use to collate index Terms, to determine their - * membership in the range bounded by lowerTerm and - * upperTerm. * * @throws IOException */ public TermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, - boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + boolean includeLower, boolean includeUpper) throws IOException { super(reader, field); - this.collator = collator; this.upperTermText = upperTermText; this.lowerTermText = lowerTermText; this.includeLower = includeLower; @@ -93,40 +85,27 @@ upperBytesRef = new BytesRef(upperTermText); } - BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef(""); + BytesRef startBytesRef = lowerBytesRef; setInitialSeekTerm(startBytesRef); termComp = getComparator(); } @Override protected AcceptStatus accept(BytesRef term) { - if (collator == null) { - if (!this.includeLower && term.equals(lowerBytesRef)) - return AcceptStatus.NO; - // Use this field's default sort ordering - if (upperBytesRef != null) { - final int cmp = termComp.compare(upperBytesRef, term); - /* - * if beyond the upper term, or is exclusive and this is equal to - * the upper term, break out - */ - if ((cmp < 0) || - (!includeUpper && cmp==0)) { - return AcceptStatus.END; - } - } - return AcceptStatus.YES; - } else { - if ((includeLower - ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 - : collator.compare(term.utf8ToString(), lowerTermText) > 0) - && (upperTermText == null - || (includeUpper - ? collator.compare(term.utf8ToString(), upperTermText) <= 0 - : collator.compare(term.utf8ToString(), upperTermText) < 0))) { - return AcceptStatus.YES; - } + if (!this.includeLower && term.equals(lowerBytesRef)) return AcceptStatus.NO; + // Use this field's default sort ordering + if (upperBytesRef != null) { + final int cmp = termComp.compare(upperBytesRef, term); + /* + * if beyond the upper term, or is exclusive and this is equal to + * the upper term, break out + */ + if ((cmp < 0) || + (!includeUpper && cmp==0)) { + return AcceptStatus.END; + } } + return AcceptStatus.YES; } } Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 982481) +++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy) @@ -17,8 +17,6 @@ * limitations under the License. */ -import java.text.Collator; - /** * A Filter that restricts search results to a range of term * values in a given field. @@ -50,29 +48,6 @@ } /** - * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The lower bound on this range - * @param upperTerm The upper bound on this range - * @param includeLower Does this range include the lower bound? - * @param includeUpper Does this range include the upper bound? - * @param collator The collator to use when determining range inclusion; set - * to null to use Unicode code point ordering instead of collation. - * @throws IllegalArgumentException if both terms are null or if - * lowerTerm is null and includeLower is true (similar for upperTerm - * and includeUpper) - */ - public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm, - boolean includeLower, boolean includeUpper, - Collator collator) { - super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); - } - - /** * Constructs a filter for field fieldName matching * less than or equal to upperTerm. */ @@ -99,7 +74,4 @@ /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return query.includesUpper(); } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return query.getCollator(); } } Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java (revision 0) @@ -0,0 +1,101 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified range parameters. + *

Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.

+ */ +public class CollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal than lowerTerm + * but less/equal than upperTerm. + * + * If an endpoint is null, it is said to be "open". Either or both + * endpoints may be open. Open endpoints may not be exclusive + * (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + * + * @param reader + * @param field + * An interned field that holds both lower and upper terms. + * @param lowerTermText + * The term text at the lower end of the range + * @param upperTermText + * The term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is included in the range. + * @param includeUpper + * If true, the upperTerm is included in the range. + * @param collator + * The collator to use to collate index Terms, to determine their + * membership in the range bounded by lowerTerm and + * upperTerm. + * + * @throws IOException + */ + public CollatedTermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, + boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + super(reader, field); + this.collator = collator; + this.upperTermText = upperTermText; + this.lowerTermText = lowerTermText; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (this.lowerTermText == null) { + this.lowerTermText = ""; + this.includeLower = true; + } + + // TODO: optimize + BytesRef startBytesRef = new BytesRef(""); + setInitialSeekTerm(startBytesRef); + } + + @Override + protected AcceptStatus accept(BytesRef term) { + if ((includeLower + ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 + : collator.compare(term.utf8ToString(), lowerTermText) > 0) + && (upperTermText == null + || (includeUpper + ? collator.compare(term.utf8ToString(), upperTermText) <= 0 + : collator.compare(term.utf8ToString(), upperTermText) < 0))) { + return AcceptStatus.YES; + } + return AcceptStatus.NO; + } +} Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeTermsEnum.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java (revision 0) @@ -0,0 +1,68 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +/** + * A Filter that restricts search results to a range of term + * values in a given field. + * + *

This filter matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeFilter} instead. + * + *

If you construct a large number of range filters with different ranges but on the + * same field, {@link FieldCacheRangeFilter} may have significantly better performance. + * @since 4.0 + */ +public class CollatedTermRangeFilter extends MultiTermQueryWrapperFilter { + /** + * + * @param lowerTerm The lower bound on this range + * @param upperTerm The upper bound on this range + * @param includeLower Does this range include the lower bound? + * @param includeUpper Does this range include the upper bound? + * @param collator The collator to use when determining range inclusion; set + * to null to use Unicode code point ordering instead of collation. + * @throws IllegalArgumentException if both terms are null or if + * lowerTerm is null and includeLower is true (similar for upperTerm + * and includeUpper) + */ + public CollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, + Collator collator) { + super(new CollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + } + + /** Returns the lower value of this range filter */ + public String getLowerTerm() { return query.getLowerTerm(); } + + /** Returns the upper value of this range filter */ + public String getUpperTerm() { return query.getUpperTerm(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return query.includesLower(); } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return query.includesUpper(); } + + /** Returns the collator used to determine range inclusion, if any. */ + public Collator getCollator() { return query.getCollator(); } +} Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 982481) +++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; @@ -43,11 +42,9 @@ public class TermRangeQuery extends MultiTermQuery { private String lowerTerm; private String upperTerm; - private Collator collator; private boolean includeLower; private boolean includeUpper; - /** * Constructs a query selecting all terms greater/equal than lowerTerm * but less/equal than upperTerm. @@ -71,47 +68,11 @@ * included in the range. */ public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { - this(field, lowerTerm, upperTerm, includeLower, includeUpper, null); - } - - /** Constructs a query selecting all terms greater/equal than - * lowerTerm but less/equal than upperTerm. - *

- * If an endpoint is null, it is said - * to be "open". Either or both endpoints may be open. Open endpoints may not - * be exclusive (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - *

- * If collator is not null, it will be used to decide whether - * index terms are within the given range, rather than using the Unicode code - * point order in which index terms are stored. - *

- * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The Term text at the lower end of the range - * @param upperTerm The Term text at the upper end of the range - * @param includeLower - * If true, the lowerTerm is - * included in the range. - * @param includeUpper - * If true, the upperTerm is - * included in the range. - * @param collator The collator to use to collate index Terms, to determine - * their membership in the range bounded by lowerTerm and - * upperTerm. - */ - public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, - Collator collator) { super(field); this.lowerTerm = lowerTerm; this.upperTerm = upperTerm; this.includeLower = includeLower; this.includeUpper = includeUpper; - this.collator = collator; } /** Returns the lower value of this range query */ @@ -125,23 +86,20 @@ /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return includeUpper; } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return collator; } @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { + if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { return TermsEnum.EMPTY; } - if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) { + if ((lowerTerm == null || (includeLower && "".equals(lowerTerm))) && upperTerm == null) { // NOTE: debateably, the caller should never pass in a // multi reader... final Terms terms = MultiFields.getTerms(reader, field); return (terms != null) ? terms.iterator() : null; } return new TermRangeTermsEnum(reader, field, - lowerTerm, upperTerm, includeLower, includeUpper, collator); + lowerTerm, upperTerm, includeLower, includeUpper); } /** @deprecated */ @@ -171,7 +129,6 @@ public int hashCode() { final int prime = 31; int result = super.hashCode(); - result = prime * result + ((collator == null) ? 0 : collator.hashCode()); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); @@ -188,11 +145,6 @@ if (getClass() != obj.getClass()) return false; TermRangeQuery other = (TermRangeQuery) obj; - if (collator == null) { - if (other.collator != null) - return false; - } else if (!collator.equals(other.collator)) - return false; if (includeLower != other.includeLower) return false; if (includeUpper != other.includeUpper) @@ -209,5 +161,4 @@ return false; return true; } - } Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java (revision 0) @@ -0,0 +1,175 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ToStringUtils; + +/** + * A Query that matches documents within an range of terms. + * + *

This query matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeQuery} instead. + * + *

This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. + * @since 4.0 + */ +public class CollatedTermRangeQuery extends MultiTermQuery { + private String lowerTerm; + private String upperTerm; + private boolean includeLower; + private boolean includeUpper; + private Collator collator; + + /** Constructs a query selecting all terms greater/equal than + * lowerTerm but less/equal than upperTerm. + *

+ * If an endpoint is null, it is said + * to be "open". Either or both endpoints may be open. Open endpoints may not + * be exclusive (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + *

+ * + * @param lowerTerm The Term text at the lower end of the range + * @param upperTerm The Term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is + * included in the range. + * @param includeUpper + * If true, the upperTerm is + * included in the range. + * @param collator The collator to use to collate index Terms, to determine + * their membership in the range bounded by lowerTerm and + * upperTerm. + */ + public CollatedTermRangeQuery(String field, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, Collator collator) { + super(field); + this.lowerTerm = lowerTerm; + this.upperTerm = upperTerm; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.collator = collator; + } + + /** Returns the lower value of this range query */ + public String getLowerTerm() { return lowerTerm; } + + /** Returns the upper value of this range query */ + public String getUpperTerm() { return upperTerm; } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return includeLower; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return includeUpper; } + + /** Returns the collator used to determine range inclusion */ + public Collator getCollator() { return collator; } + + @Override + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) { + return TermsEnum.EMPTY; + } + if (lowerTerm == null && upperTerm == null) { + // NOTE: debateably, the caller should never pass in a + // multi reader... + final Terms terms = MultiFields.getTerms(reader, field); + return (terms != null) ? terms.iterator() : null; + } + return new CollatedTermRangeTermsEnum(reader, field, + lowerTerm, upperTerm, includeLower, includeUpper, collator); + } + + /** @deprecated */ + @Deprecated + public String field() { + return getField(); + } + + /** Prints a user-readable version of this query. */ + @Override + public String toString(String field) { + StringBuilder buffer = new StringBuilder(); + if (!getField().equals(field)) { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(includeLower ? '[' : '{'); + buffer.append(lowerTerm != null ? lowerTerm : "*"); + buffer.append(" TO "); + buffer.append(upperTerm != null ? upperTerm : "*"); + buffer.append(includeUpper ? ']' : '}'); + buffer.append(ToStringUtils.boost(getBoost())); + return buffer.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((collator == null) ? 0 : collator.hashCode()); + result = prime * result + (includeLower ? 1231 : 1237); + result = prime * result + (includeUpper ? 1231 : 1237); + result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); + result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + CollatedTermRangeQuery other = (CollatedTermRangeQuery) obj; + if (collator == null) { + if (other.collator != null) + return false; + } else if (!collator.equals(other.collator)) + return false; + if (includeLower != other.includeLower) + return false; + if (includeUpper != other.includeUpper) + return false; + if (lowerTerm == null) { + if (other.lowerTerm != null) + return false; + } else if (!lowerTerm.equals(other.lowerTerm)) + return false; + if (upperTerm == null) { + if (other.upperTerm != null) + return false; + } else if (!upperTerm.equals(other.upperTerm)) + return false; + return true; + } +} Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeQuery.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (revision 982481) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (working copy) @@ -17,17 +17,20 @@ * limitations under the License. */ +import java.text.Collator; + import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode.CompareOperator; import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.TermRangeQuery; /** - * Builds a {@link TermRangeQuery} object from a {@link RangeQueryNode} object. + * Builds a {@link MultiTermQuery} object from a {@link RangeQueryNode} object. */ public class RangeQueryNodeBuilder implements StandardQueryBuilder { @@ -35,7 +38,7 @@ // empty constructor } - public TermRangeQuery build(QueryNode queryNode) throws QueryNodeException { + public MultiTermQuery build(QueryNode queryNode) throws QueryNodeException { RangeQueryNode rangeNode = (RangeQueryNode) queryNode; ParametricQueryNode upper = rangeNode.getUpperBound(); ParametricQueryNode lower = rangeNode.getLowerBound(); @@ -53,9 +56,10 @@ String field = rangeNode.getField().toString(); - TermRangeQuery rangeQuery = new TermRangeQuery(field, lower - .getTextAsString(), upper.getTextAsString(), lowerInclusive, - upperInclusive, rangeNode.getCollator()); + Collator collator = rangeNode.getCollator(); + MultiTermQuery rangeQuery = collator == null ? + new TermRangeQuery(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive) : + new CollatedTermRangeQuery(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive, collator); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodAttribute.TAG_ID); if (method != null) { Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (revision 982481) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.text.Collator; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -29,6 +30,7 @@ import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -174,8 +176,10 @@ if (isPass2ResolvingPhrases) { // Must use old-style RangeQuery in order to produce a BooleanQuery // that can be turned into SpanOr clause - TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, - getRangeCollator()); + Collator collator = getRangeCollator(); + MultiTermQuery rangeQuery = collator == null ? + new TermRangeQuery(field, part1, part2, inclusive, inclusive) : + new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, collator); rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 982481) +++ lucene/MIGRATE.txt (working copy) @@ -266,3 +266,7 @@ Likewise for DocsAndPositionsEnum. +LUCENE-2514: The option to use a Collator's order (instead of unicode order) for +TermRangeQuery/Filter has been moved to CollatedTermRangeQuery/Filter. +Note: this functionality isn't very scalable and if you are using it, consider +indexing collation keys with the collation support in the analysis module instead.