Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -189,12 +189,12 @@ assertFalse("queries with different inclusive are not equal", query.equals(other)); query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); + other = new CollatedTermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); assertFalse("a query with a collator is not equal to one without", query.equals(other)); } public void testExclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); + Query query = new CollatedTermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -215,7 +215,7 @@ } public void testInclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); + Query query = new CollatedTermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -241,7 +241,7 @@ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator); + Query query = new CollatedTermRangeQuery("content", "\u062F", "\u0698", true, true, collator); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeQuery with a Farsi @@ -251,7 +251,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); - query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator); + query = new CollatedTermRangeQuery("content", "\u0633", "\u0638",true, true, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); searcher.close(); @@ -262,7 +262,7 @@ // Danish collation orders the words below in the given order (example taken // from TestSort.testInternationalSort() ). String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); + Query query = new CollatedTermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. @@ -271,7 +271,7 @@ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); - query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); + query = new CollatedTermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); searcher.close(); Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -99,7 +99,7 @@ /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih, Collator c) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); + CollatedTermRangeQuery query = new CollatedTermRangeQuery(f, l, h, il, ih, c); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } Index: lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (revision 982481) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (working copy) @@ -161,84 +161,84 @@ Query q = new TermQuery(new Term("body", "body")); // test id, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, + int numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, maxIP, T, T, c), 1000).totalHits; assertEquals("find all", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; assertEquals("all but last", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; assertEquals("all but first", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; assertEquals("all but ends", numDocs - 2, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; assertEquals("med and up", 1 + maxId - medId, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; assertEquals("up to med", 1 + medId - minId, numHits); // unbounded id - numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, null, T, F, c), 1000).totalHits; assertEquals("min and up", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, maxIP, F, T, c), 1000).totalHits; assertEquals("max and down", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", minIP, null, F, F, c), 1000).totalHits; assertEquals("not min, but up", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, maxIP, F, F, c), 1000).totalHits; assertEquals("not max, but down", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; assertEquals("med and up, not max", maxId - medId, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; assertEquals("not min, up to med", medId - minId, numHits); // very small sets numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; assertEquals("min,min,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; assertEquals("med,med,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; assertEquals("max,max,F,F", 0, numHits); numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", null, minIP, F, T, c), 1000).totalHits; assertEquals("nul,min,F,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c), + numHits = search.search(q, new CollatedTermRangeFilter("id", maxIP, null, T, F, c), 1000).totalHits; assertEquals("max,nul,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; + new CollatedTermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; assertEquals("med,med,T,T", 1, numHits); } @@ -337,61 +337,61 @@ // test extremes, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, + int numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, T, T, c), 1000).totalHits; assertEquals("find all", numDocs, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, T, F, c), 1000).totalHits; assertEquals("all but biggest", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, F, T, c), 1000).totalHits; assertEquals("all but smallest", numDocs - 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, maxRP, F, F, c), 1000).totalHits; assertEquals("all but extremes", numDocs - 2, numHits); // unbounded numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; assertEquals("smallest and up", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; assertEquals("biggest and down", numDocs, numHits); numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; assertEquals("not smallest, but up", numDocs - 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; assertEquals("not biggest, but down", numDocs - 1, numHits); // very small sets - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, minRP, F, F, c), 1000).totalHits; assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, + numHits = search.search(q, new CollatedTermRangeFilter("rand", maxRP, maxRP, F, F, c), 1000).totalHits; assertEquals("max,max,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", minRP, minRP, T, T, c), 1000).totalHits; assertEquals("min,min,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; assertEquals("nul,min,F,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, + numHits = search.search(q, new CollatedTermRangeFilter("rand", maxRP, maxRP, T, T, c), 1000).totalHits; assertEquals("max,max,T,T", 1, numHits); numHits = search.search(q, - new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; + new CollatedTermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; assertEquals("max,nul,T,T", 1, numHits); } @@ -423,11 +423,11 @@ // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). - int numHits = search.search(q, new TermRangeFilter("content", "\u062F", + int numHits = search.search(q, new CollatedTermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000).totalHits; assertEquals("The index Term should not be included.", 0, numHits); - numHits = search.search(q, new TermRangeFilter("content", "\u0633", + numHits = search.search(q, new CollatedTermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000).totalHits; assertEquals("The index Term should be included.", 1, numHits); search.close(); @@ -461,11 +461,11 @@ // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], // but Danish collation does. - int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T", + int numHits = search.search(q, new CollatedTermRangeFilter("content", "H\u00D8T", "MAND", F, F, collator), 1000).totalHits; assertEquals("The index Term should be included.", 1, numHits); - numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T", + numHits = search.search(q, new CollatedTermRangeFilter("content", "H\u00C5T", "MAND", F, F, collator), 1000).totalHits; assertEquals("The index Term should not be included.", 0, numHits); search.close(); Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -882,7 +883,9 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + final MultiTermQuery query = rangeCollator == null ? + new TermRangeQuery(field, part1, part2, inclusive, inclusive) : + new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -1569,16 +1572,6 @@ finally { jj_save(0, xla); } } - private boolean jj_3_1() { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_2()) { - jj_scanpos = xsp; - if (jj_3R_3()) return true; - } - return false; - } - private boolean jj_3R_3() { if (jj_scan_token(STAR)) return true; if (jj_scan_token(COLON)) return true; @@ -1591,6 +1584,16 @@ return false; } + private boolean jj_3_1() { + Token xsp; + xsp = jj_scanpos; + if (jj_3R_2()) { + jj_scanpos = xsp; + if (jj_3R_3()) return true; + } + return false; + } + /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -47,6 +47,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; @@ -906,7 +907,9 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + final MultiTermQuery query = rangeCollator == null ? + new TermRangeQuery(field, part1, part2, inclusive, inclusive) : + new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 982481) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CollatedTermRangeQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MatchAllDocsQuery; Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 982481) +++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; import java.util.Comparator; import org.apache.lucene.index.IndexReader; @@ -32,8 +31,6 @@ * greater than all that precede it.
*/ public class TermRangeTermsEnum extends FilteredTermsEnum { - - private Collator collator; private String upperTermText; private String lowerTermText; private boolean includeLower; @@ -62,17 +59,12 @@ * If true, thelowerTerm is included in the range.
* @param includeUpper
* If true, the upperTerm is included in the range.
- * @param collator
- * The collator to use to collate index Terms, to determine their
- * membership in the range bounded by lowerTerm and
- * upperTerm.
*
* @throws IOException
*/
public TermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText,
- boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ boolean includeLower, boolean includeUpper) throws IOException {
super(reader, field);
- this.collator = collator;
this.upperTermText = upperTermText;
this.lowerTermText = lowerTermText;
this.includeLower = includeLower;
@@ -93,40 +85,27 @@
upperBytesRef = new BytesRef(upperTermText);
}
- BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef("");
+ BytesRef startBytesRef = lowerBytesRef;
setInitialSeekTerm(startBytesRef);
termComp = getComparator();
}
@Override
protected AcceptStatus accept(BytesRef term) {
- if (collator == null) {
- if (!this.includeLower && term.equals(lowerBytesRef))
- return AcceptStatus.NO;
- // Use this field's default sort ordering
- if (upperBytesRef != null) {
- final int cmp = termComp.compare(upperBytesRef, term);
- /*
- * if beyond the upper term, or is exclusive and this is equal to
- * the upper term, break out
- */
- if ((cmp < 0) ||
- (!includeUpper && cmp==0)) {
- return AcceptStatus.END;
- }
- }
- return AcceptStatus.YES;
- } else {
- if ((includeLower
- ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
- : collator.compare(term.utf8ToString(), lowerTermText) > 0)
- && (upperTermText == null
- || (includeUpper
- ? collator.compare(term.utf8ToString(), upperTermText) <= 0
- : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
- return AcceptStatus.YES;
- }
+ if (!this.includeLower && term.equals(lowerBytesRef))
return AcceptStatus.NO;
+ // Use this field's default sort ordering
+ if (upperBytesRef != null) {
+ final int cmp = termComp.compare(upperBytesRef, term);
+ /*
+ * if beyond the upper term, or is exclusive and this is equal to
+ * the upper term, break out
+ */
+ if ((cmp < 0) ||
+ (!includeUpper && cmp==0)) {
+ return AcceptStatus.END;
+ }
}
+ return AcceptStatus.YES;
}
}
Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 982481)
+++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy)
@@ -17,8 +17,6 @@
* limitations under the License.
*/
-import java.text.Collator;
-
/**
* A Filter that restricts search results to a range of term
* values in a given field.
@@ -50,29 +48,6 @@
}
/**
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The lower bound on this range
- * @param upperTerm The upper bound on this range
- * @param includeLower Does this range include the lower bound?
- * @param includeUpper Does this range include the upper bound?
- * @param collator The collator to use when determining range inclusion; set
- * to null to use Unicode code point ordering instead of collation.
- * @throws IllegalArgumentException if both terms are null or if
- * lowerTerm is null and includeLower is true (similar for upperTerm
- * and includeUpper)
- */
- public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
- boolean includeLower, boolean includeUpper,
- Collator collator) {
- super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
- }
-
- /**
* Constructs a filter for field fieldName matching
* less than or equal to upperTerm.
*/
@@ -99,7 +74,4 @@
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return query.includesUpper(); }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return query.getCollator(); }
}
Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeTermsEnum.java (revision 0)
@@ -0,0 +1,101 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified range parameters.
+ * Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.
+ */ +public class CollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal thanlowerTerm
+ * but less/equal than upperTerm.
+ *
+ * If an endpoint is null, it is said to be "open". Either or both
+ * endpoints may be open. Open endpoints may not be exclusive
+ * (you can't select all but the first or last term without
+ * explicitly specifying the term to exclude.)
+ *
+ * @param reader
+ * @param field
+ * An interned field that holds both lower and upper terms.
+ * @param lowerTermText
+ * The term text at the lower end of the range
+ * @param upperTermText
+ * The term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is included in the range.
+ * @param collator
+ * The collator to use to collate index Terms, to determine their
+ * membership in the range bounded by lowerTerm and
+ * upperTerm.
+ *
+ * @throws IOException
+ */
+ public CollatedTermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText,
+ boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ super(reader, field);
+ this.collator = collator;
+ this.upperTermText = upperTermText;
+ this.lowerTermText = lowerTermText;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+
+ // do a little bit of normalization...
+ // open ended range queries should always be inclusive.
+ if (this.lowerTermText == null) {
+ this.lowerTermText = "";
+ this.includeLower = true;
+ }
+
+ // TODO: optimize
+ BytesRef startBytesRef = new BytesRef("");
+ setInitialSeekTerm(startBytesRef);
+ }
+
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ if ((includeLower
+ ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
+ : collator.compare(term.utf8ToString(), lowerTermText) > 0)
+ && (upperTermText == null
+ || (includeUpper
+ ? collator.compare(term.utf8ToString(), upperTermText) <= 0
+ : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
+ return AcceptStatus.YES;
+ }
+ return AcceptStatus.NO;
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeTermsEnum.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeFilter.java (revision 0)
@@ -0,0 +1,68 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.Collator;
+
+/**
+ * A Filter that restricts search results to a range of term
+ * values in a given field.
+ *
+ * This filter matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeFilter} instead.
+ *
+ *
If you construct a large number of range filters with different ranges but on the
+ * same field, {@link FieldCacheRangeFilter} may have significantly better performance.
+ * @since 4.0
+ */
+public class CollatedTermRangeFilter extends MultiTermQueryWrapperFilter
- * If an endpoint is null, it is said
- * to be "open". Either or both endpoints may be open. Open endpoints may not
- * be exclusive (you can't select all but the first or last term without
- * explicitly specifying the term to exclude.)
- *
- * If
- * WARNING: Using this constructor and supplying a non-null
- * value in the This query matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
+ * @since 4.0
+ */
+public class CollatedTermRangeQuery extends MultiTermQuery {
+ private String lowerTerm;
+ private String upperTerm;
+ private boolean includeLower;
+ private boolean includeUpper;
+ private Collator collator;
+
+ /** Constructs a query selecting all terms greater/equal than
+ *
+ * If an endpoint is null, it is said
+ * to be "open". Either or both endpoints may be open. Open endpoints may not
+ * be exclusive (you can't select all but the first or last term without
+ * explicitly specifying the term to exclude.)
+ *
+ *
+ * @param lowerTerm The Term text at the lower end of the range
+ * @param upperTerm The Term text at the upper end of the range
+ * @param includeLower
+ * If true, the true if the lower endpoint is inclusive */
+ public boolean includesLower() { return query.includesLower(); }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return query.includesUpper(); }
+
+ /** Returns the collator used to determine range inclusion, if any. */
+ public Collator getCollator() { return query.getCollator(); }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeFilter.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 982481)
+++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -18,7 +18,6 @@
*/
import java.io.IOException;
-import java.text.Collator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
@@ -43,11 +42,9 @@
public class TermRangeQuery extends MultiTermQuery {
private String lowerTerm;
private String upperTerm;
- private Collator collator;
private boolean includeLower;
private boolean includeUpper;
-
/**
* Constructs a query selecting all terms greater/equal than lowerTerm
* but less/equal than upperTerm.
@@ -71,47 +68,11 @@
* included in the range.
*/
public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
- this(field, lowerTerm, upperTerm, includeLower, includeUpper, null);
- }
-
- /** Constructs a query selecting all terms greater/equal than
- * lowerTerm but less/equal than upperTerm.
- * collator is not null, it will be used to decide whether
- * index terms are within the given range, rather than using the Unicode code
- * point order in which index terms are stored.
- * collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The Term text at the lower end of the range
- * @param upperTerm The Term text at the upper end of the range
- * @param includeLower
- * If true, the lowerTerm is
- * included in the range.
- * @param includeUpper
- * If true, the upperTerm is
- * included in the range.
- * @param collator The collator to use to collate index Terms, to determine
- * their membership in the range bounded by lowerTerm and
- * upperTerm.
- */
- public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
- Collator collator) {
super(field);
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
- this.collator = collator;
}
/** Returns the lower value of this range query */
@@ -125,23 +86,20 @@
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return collator; }
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
+ if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
return TermsEnum.EMPTY;
}
- if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) {
+ if ((lowerTerm == null || (includeLower && "".equals(lowerTerm))) && upperTerm == null) {
// NOTE: debateably, the caller should never pass in a
// multi reader...
final Terms terms = MultiFields.getTerms(reader, field);
return (terms != null) ? terms.iterator() : null;
}
return new TermRangeTermsEnum(reader, field,
- lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ lowerTerm, upperTerm, includeLower, includeUpper);
}
/** @deprecated */
@@ -171,7 +129,6 @@
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
- result = prime * result + ((collator == null) ? 0 : collator.hashCode());
result = prime * result + (includeLower ? 1231 : 1237);
result = prime * result + (includeUpper ? 1231 : 1237);
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
@@ -188,11 +145,6 @@
if (getClass() != obj.getClass())
return false;
TermRangeQuery other = (TermRangeQuery) obj;
- if (collator == null) {
- if (other.collator != null)
- return false;
- } else if (!collator.equals(other.collator))
- return false;
if (includeLower != other.includeLower)
return false;
if (includeUpper != other.includeUpper)
@@ -209,5 +161,4 @@
return false;
return true;
}
-
}
Index: lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java (revision 0)
+++ lucene/src/java/org/apache/lucene/search/CollatedTermRangeQuery.java (revision 0)
@@ -0,0 +1,175 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.ToStringUtils;
+
+/**
+ * A Query that matches documents within an range of terms.
+ *
+ * Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeQuery} instead.
+ *
+ * lowerTerm but less/equal than upperTerm.
+ * lowerTerm is
+ * included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is
+ * included in the range.
+ * @param collator The collator to use to collate index Terms, to determine
+ * their membership in the range bounded by lowerTerm and
+ * upperTerm.
+ */
+ public CollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
+ boolean includeLower, boolean includeUpper, Collator collator) {
+ super(field);
+ this.lowerTerm = lowerTerm;
+ this.upperTerm = upperTerm;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.collator = collator;
+ }
+
+ /** Returns the lower value of this range query */
+ public String getLowerTerm() { return lowerTerm; }
+
+ /** Returns the upper value of this range query */
+ public String getUpperTerm() { return upperTerm; }
+
+ /** Returns true if the lower endpoint is inclusive */
+ public boolean includesLower() { return includeLower; }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return includeUpper; }
+
+ /** Returns the collator used to determine range inclusion */
+ public Collator getCollator() { return collator; }
+
+ @Override
+ protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
+ return TermsEnum.EMPTY;
+ }
+ if (lowerTerm == null && upperTerm == null) {
+ // NOTE: debateably, the caller should never pass in a
+ // multi reader...
+ final Terms terms = MultiFields.getTerms(reader, field);
+ return (terms != null) ? terms.iterator() : null;
+ }
+ return new CollatedTermRangeTermsEnum(reader, field,
+ lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ }
+
+ /** @deprecated */
+ @Deprecated
+ public String field() {
+ return getField();
+ }
+
+ /** Prints a user-readable version of this query. */
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!getField().equals(field)) {
+ buffer.append(getField());
+ buffer.append(":");
+ }
+ buffer.append(includeLower ? '[' : '{');
+ buffer.append(lowerTerm != null ? lowerTerm : "*");
+ buffer.append(" TO ");
+ buffer.append(upperTerm != null ? upperTerm : "*");
+ buffer.append(includeUpper ? ']' : '}');
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((collator == null) ? 0 : collator.hashCode());
+ result = prime * result + (includeLower ? 1231 : 1237);
+ result = prime * result + (includeUpper ? 1231 : 1237);
+ result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
+ result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ CollatedTermRangeQuery other = (CollatedTermRangeQuery) obj;
+ if (collator == null) {
+ if (other.collator != null)
+ return false;
+ } else if (!collator.equals(other.collator))
+ return false;
+ if (includeLower != other.includeLower)
+ return false;
+ if (includeUpper != other.includeUpper)
+ return false;
+ if (lowerTerm == null) {
+ if (other.lowerTerm != null)
+ return false;
+ } else if (!lowerTerm.equals(other.lowerTerm))
+ return false;
+ if (upperTerm == null) {
+ if (other.upperTerm != null)
+ return false;
+ } else if (!upperTerm.equals(other.upperTerm))
+ return false;
+ return true;
+ }
+}
Property changes on: lucene\src\java\org\apache\lucene\search\CollatedTermRangeQuery.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (revision 982481)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (working copy)
@@ -17,17 +17,20 @@
* limitations under the License.
*/
+import java.text.Collator;
+
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode.CompareOperator;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
+import org.apache.lucene.search.CollatedTermRangeQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TermRangeQuery;
/**
- * Builds a {@link TermRangeQuery} object from a {@link RangeQueryNode} object.
+ * Builds a {@link MultiTermQuery} object from a {@link RangeQueryNode} object.
*/
public class RangeQueryNodeBuilder implements StandardQueryBuilder {
@@ -35,7 +38,7 @@
// empty constructor
}
- public TermRangeQuery build(QueryNode queryNode) throws QueryNodeException {
+ public MultiTermQuery build(QueryNode queryNode) throws QueryNodeException {
RangeQueryNode rangeNode = (RangeQueryNode) queryNode;
ParametricQueryNode upper = rangeNode.getUpperBound();
ParametricQueryNode lower = rangeNode.getLowerBound();
@@ -53,9 +56,10 @@
String field = rangeNode.getField().toString();
- TermRangeQuery rangeQuery = new TermRangeQuery(field, lower
- .getTextAsString(), upper.getTextAsString(), lowerInclusive,
- upperInclusive, rangeNode.getCollator());
+ Collator collator = rangeNode.getCollator();
+ MultiTermQuery rangeQuery = collator == null ?
+ new TermRangeQuery(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive) :
+ new CollatedTermRangeQuery(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive, collator);
MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodAttribute.TAG_ID);
if (method != null) {
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (revision 982481)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.text.Collator;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -29,6 +30,7 @@
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.CollatedTermRangeQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -174,8 +176,10 @@
if (isPass2ResolvingPhrases) {
// Must use old-style RangeQuery in order to produce a BooleanQuery
// that can be turned into SpanOr clause
- TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
- getRangeCollator());
+ Collator collator = getRangeCollator();
+ MultiTermQuery rangeQuery = collator == null ?
+ new TermRangeQuery(field, part1, part2, inclusive, inclusive) :
+ new CollatedTermRangeQuery(field, part1, part2, inclusive, inclusive, collator);
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return rangeQuery;
}
Index: lucene/MIGRATE.txt
===================================================================
--- lucene/MIGRATE.txt (revision 982481)
+++ lucene/MIGRATE.txt (working copy)
@@ -266,3 +266,7 @@
Likewise for DocsAndPositionsEnum.
+LUCENE-2514: The option to use a Collator's order (instead of unicode order) for
+TermRangeQuery/Filter has been moved to CollatedTermRangeQuery/Filter.
+Note: this functionality isn't very scalable and if you are using it, consider
+indexing collation keys with the collation support in the analysis module instead.