Index: solr/src/java/org/apache/solr/schema/DateField.java
===================================================================
--- solr/src/java/org/apache/solr/schema/DateField.java (revision 986557)
+++ solr/src/java/org/apache/solr/schema/DateField.java (working copy)
@@ -407,7 +407,7 @@
/** DateField specific range query */
public Query getRangeQuery(QParser parser, SchemaField sf, Date part1, Date part2, boolean minInclusive, boolean maxInclusive) {
- return new TermRangeQuery(
+ return TermRangeQuery.newStringRange(
sf.getName(),
part1 == null ? null : toInternal(part1),
part2 == null ? null : toInternal(part2),
Index: solr/src/java/org/apache/solr/schema/FieldType.java
===================================================================
--- solr/src/java/org/apache/solr/schema/FieldType.java (revision 986557)
+++ solr/src/java/org/apache/solr/schema/FieldType.java (working copy)
@@ -529,7 +529,7 @@
*/
public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
// constant score mode is now enabled per default
- return new TermRangeQuery(
+ return TermRangeQuery.newStringRange(
field.getName(),
part1 == null ? null : toInternal(part1),
part2 == null ? null : toInternal(part2),
Index: solr/src/java/org/apache/solr/search/QueryParsing.java
===================================================================
--- solr/src/java/org/apache/solr/search/QueryParsing.java (revision 986557)
+++ solr/src/java/org/apache/solr/search/QueryParsing.java (working copy)
@@ -443,8 +443,8 @@
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{');
- String lt = q.getLowerTerm();
- String ut = q.getUpperTerm();
+ String lt = q.getLowerTerm().utf8ToString();
+ String ut = q.getUpperTerm().utf8ToString();
if (lt == null) {
out.append('*');
} else {
Index: solr/src/java/org/apache/solr/handler/component/ShardDoc.java
===================================================================
--- solr/src/java/org/apache/solr/handler/component/ShardDoc.java (revision 986557)
+++ solr/src/java/org/apache/solr/handler/component/ShardDoc.java (working copy)
@@ -94,15 +94,10 @@
String fieldname = fields[i].getField();
comparators[i] = getCachedComparator(fieldname, fields[i]
- .getType(), fields[i].getLocale(), fields[i].getComparatorSource());
+ .getType(), fields[i].getComparatorSource());
- if (fields[i].getType() == SortField.STRING) {
- this.fields[i] = new SortField(fieldname, fields[i].getLocale(),
+ this.fields[i] = new SortField(fieldname, fields[i].getType(),
fields[i].getReverse());
- } else {
- this.fields[i] = new SortField(fieldname, fields[i].getType(),
- fields[i].getReverse());
- }
//System.out.println("%%%%%%%%%%%%%%%%%% got "+fields[i].getType() +" for "+ fieldname +" fields[i].getReverse(): "+fields[i].getReverse());
}
@@ -144,17 +139,14 @@
return c < 0;
}
- Comparator getCachedComparator(String fieldname, int type, Locale locale, FieldComparatorSource factory) {
+ Comparator getCachedComparator(String fieldname, int type, FieldComparatorSource factory) {
Comparator comparator = null;
switch (type) {
case SortField.SCORE:
comparator = comparatorScore(fieldname);
break;
case SortField.STRING:
- if (locale != null)
- comparator = comparatorStringLocale(fieldname, locale);
- else
- comparator = comparatorNatural(fieldname);
+ comparator = comparatorNatural(fieldname);
break;
case SortField.CUSTOM:
if (factory instanceof MissingStringLastComparatorSource){
@@ -242,28 +234,6 @@
};
}
-
- Comparator comparatorStringLocale(final String fieldName,
- Locale locale) {
- final Collator collator = Collator.getInstance(locale);
- return new ShardComparator(fieldName) {
- public final int compare(final Object o1, final Object o2) {
- ShardDoc sd1 = (ShardDoc) o1;
- ShardDoc sd2 = (ShardDoc) o2;
- Comparable v1 = (Comparable)sortVal(sd1);
- Comparable v2 = (Comparable)sortVal(sd2);
- if (v1==v2)
- return 0;
- if (v1==null)
- return 1;
- if(v2==null)
- return -1;
- return -collator.compare(v1,v2);
- }
- };
- }
-
-
Comparator comparatorMissingStringLast(final String fieldName) {
return new ShardComparator(fieldName) {
public final int compare(final Object o1, final Object o2) {
Index: modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (revision 986557)
+++ modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (working copy)
@@ -89,11 +89,11 @@
// Collator (or an Arabic one for the case when Farsi searcher not
// supported).
ScoreDoc[] result = searcher.search
- (query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs;
+ (query, TermRangeFilter.newStringRange("content", firstBeg, firstEnd, true, true), 1).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = searcher.search
- (query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs;
+ (query, TermRangeFilter.newStringRange("content", secondBeg, secondEnd, true, true), 1).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
searcher.close();
@@ -117,11 +117,11 @@
writer.close();
IndexSearcher searcher = new IndexSearcher(ramDir, true);
- Query query = new TermRangeQuery("content", firstBeg, firstEnd, true, true);
+ Query query = TermRangeQuery.newStringRange("content", firstBeg, firstEnd, true, true);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, hits.length);
- query = new TermRangeQuery("content", secondBeg, secondEnd, true, true);
+ query = TermRangeQuery.newStringRange("content", secondBeg, secondEnd, true, true);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, hits.length);
searcher.close();
@@ -150,11 +150,11 @@
// with a Farsi Collator (or an Arabic one for the case when Farsi is
// not supported).
Query csrq
- = new TermRangeQuery("content", firstBeg, firstEnd, true, true);
+ = TermRangeQuery.newStringRange("content", firstBeg, firstEnd, true, true);
ScoreDoc[] result = search.search(csrq, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
- csrq = new TermRangeQuery
+ csrq = TermRangeQuery.newStringRange
("content", secondBeg, secondEnd, true, true);
result = search.search(csrq, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 986557)
+++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
-import java.text.Collator;
import java.text.DateFormat;
import java.util.Arrays;
import java.util.Calendar;
@@ -568,51 +567,6 @@
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
}
-
- public void testFarsiRangeCollating() throws Exception {
- Random random = newRandom();
- MockRAMDirectory ramDir = newDirectory(random);
- IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
- Document doc = new Document();
- doc.add(new Field("content","\u0633\u0627\u0628",
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- iw.addDocument(doc);
- iw.close();
- IndexSearcher is = new IndexSearcher(ramDir, true);
-
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
- qp.setRangeCollator(c);
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is not
- // supported).
-
- // Test ConstantScoreRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- // Test TermRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- is.close();
- ramDir.close();
- }
private String escapeDateString(String s) {
if (s.indexOf(" ") > -1) {
Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 986557)
+++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy)
@@ -32,12 +32,10 @@
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.io.Reader;
-import java.util.Locale;
import java.util.Random;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
-import java.text.Collator;
public class TestTermRangeQuery extends LuceneTestCase {
@@ -60,7 +58,7 @@
}
public void testExclusive() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", false, false);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir, true);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@@ -81,7 +79,7 @@
}
public void testInclusive() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -111,11 +109,11 @@
query = new TermRangeQuery("content", null, null, false, false);
assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
- query = new TermRangeQuery("content", "", null, true, false);
+ query = TermRangeQuery.newStringRange("content", "", null, true, false);
assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
// and now anothe one
- query = new TermRangeQuery("content", "B", null, true, false);
+ query = TermRangeQuery.newStringRange("content", "B", null, true, false);
assertTrue(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum);
assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
searcher.close();
@@ -127,7 +125,7 @@
initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"});
IndexSearcher searcher = new IndexSearcher(dir, true);
- TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true);
+ TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true);
checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J");
final int savedClauseCount = BooleanQuery.getMaxClauseCount();
@@ -156,10 +154,10 @@
}
public void testEqualsHashcode() {
- Query query = new TermRangeQuery("content", "A", "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true);
query.setBoost(1.0f);
- Query other = new TermRangeQuery("content", "A", "C", true, true);
+ Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
other.setBoost(1.0f);
assertEquals("query equals itself is true", query, query);
@@ -169,122 +167,34 @@
other.setBoost(2.0f);
assertFalse("Different boost queries are not equal", query.equals(other));
- other = new TermRangeQuery("notcontent", "A", "C", true, true);
+ other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true);
assertFalse("Different fields are not equal", query.equals(other));
- other = new TermRangeQuery("content", "X", "C", true, true);
+ other = TermRangeQuery.newStringRange("content", "X", "C", true, true);
assertFalse("Different lower terms are not equal", query.equals(other));
- other = new TermRangeQuery("content", "A", "Z", true, true);
+ other = TermRangeQuery.newStringRange("content", "A", "Z", true, true);
assertFalse("Different upper terms are not equal", query.equals(other));
- query = new TermRangeQuery("content", null, "C", true, true);
- other = new TermRangeQuery("content", null, "C", true, true);
+ query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+ other = TermRangeQuery.newStringRange("content", null, "C", true, true);
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
- query = new TermRangeQuery("content", "C", null, true, true);
- other = new TermRangeQuery("content", "C", null, true, true);
+ query = TermRangeQuery.newStringRange("content", "C", null, true, true);
+ other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertEquals("equivalent queries with null upperterms are equal()", query, other);
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
- query = new TermRangeQuery("content", null, "C", true, true);
- other = new TermRangeQuery("content", "C", null, true, true);
+ query = TermRangeQuery.newStringRange("content", null, "C", true, true);
+ other = TermRangeQuery.newStringRange("content", "C", null, true, true);
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
- query = new TermRangeQuery("content", "A", "C", false, false);
- other = new TermRangeQuery("content", "A", "C", true, true);
+ query = TermRangeQuery.newStringRange("content", "A", "C", false, false);
+ other = TermRangeQuery.newStringRange("content", "A", "C", true, true);
assertFalse("queries with different inclusive are not equal", query.equals(other));
-
- query = new TermRangeQuery("content", "A", "C", false, false);
- other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance());
- assertFalse("a query with a collator is not equal to one without", query.equals(other));
}
- public void testExclusiveCollating() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
- initializeIndex(new String[] {"A", "B", "C", "D"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,C,D, only B in range", 1, hits.length);
- searcher.close();
-
- initializeIndex(new String[] {"A", "B", "D"});
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,D, only B in range", 1, hits.length);
- searcher.close();
-
- addDoc("C");
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("C added, still only B in range", 1, hits.length);
- searcher.close();
- }
-
- public void testInclusiveCollating() throws Exception {
- Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
-
- initializeIndex(new String[]{"A", "B", "C", "D"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
- searcher.close();
-
- initializeIndex(new String[]{"A", "B", "D"});
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("A,B,D - A and B in range", 2, hits.length);
- searcher.close();
-
- addDoc("C");
- searcher = new IndexSearcher(dir, true);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("C added - A, B, C in range", 3, hits.length);
- searcher.close();
- }
-
- public void testFarsi() throws Exception {
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator collator = Collator.getInstance(new Locale("ar"));
- Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a TermRangeQuery with a Farsi
- // Collator (or an Arabic one for the case when Farsi is not supported).
- initializeIndex(new String[]{ "\u0633\u0627\u0628"});
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, hits.length);
-
- query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, hits.length);
- searcher.close();
- }
-
- public void testDanish() throws Exception {
- Collator collator = Collator.getInstance(new Locale("da", "dk"));
- // Danish collation orders the words below in the given order (example taken
- // from TestSort.testInternationalSort() ).
- String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
- Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
-
- // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
- // but Danish collation does.
- initializeIndex(words);
- IndexSearcher searcher = new IndexSearcher(dir, true);
- ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, hits.length);
-
- query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
- hits = searcher.search(query, null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, hits.length);
- searcher.close();
- }
-
private static class SingleCharAnalyzer extends Analyzer {
private static class SingleCharTokenizer extends Tokenizer {
@@ -369,7 +279,7 @@
public void testExclusiveLowerNull() throws Exception {
Analyzer analyzer = new SingleCharAnalyzer();
//http://issues.apache.org/jira/browse/LUCENE-38
- Query query = new TermRangeQuery("content", null, "C",
+ Query query = TermRangeQuery.newStringRange("content", null, "C",
false, false);
initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir, true);
@@ -402,7 +312,7 @@
public void testInclusiveLowerNull() throws Exception {
//http://issues.apache.org/jira/browse/LUCENE-38
Analyzer analyzer = new SingleCharAnalyzer();
- Query query = new TermRangeQuery("content", null, "C", true, true);
+ Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
IndexSearcher searcher = new IndexSearcher(dir, true);
int numHits = searcher.search(query, null, 1000).totalHits;
Index: lucene/src/test/org/apache/lucene/search/TestSort.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 986557)
+++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy)
@@ -23,7 +23,6 @@
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.Locale;
import java.util.Random;
import junit.framework.Test;
@@ -518,12 +517,6 @@
sort.setSort (new SortField ("string", SortField.STRING, true) );
assertMatches (full, queryF, sort, "IJZ");
- sort.setSort (new SortField ("i18n", Locale.ENGLISH));
- assertMatches (full, queryF, sort, "ZJI");
-
- sort.setSort (new SortField ("i18n", Locale.ENGLISH, true));
- assertMatches (full, queryF, sort, "IJZ");
-
sort.setSort (new SortField ("int", SortField.INT) );
assertMatches (full, queryF, sort, "IZJ");
@@ -595,51 +588,6 @@
assertMatches (full, queryX, sort, "GICEA");
}
- // test using a Locale for sorting strings
- public void testLocaleSort() throws Exception {
- sort.setSort (new SortField ("string", Locale.US) );
- assertMatches (full, queryX, sort, "AIGEC");
- assertMatches (full, queryY, sort, "DJHFB");
-
- sort.setSort (new SortField ("string", Locale.US, true) );
- assertMatches (full, queryX, sort, "CEGIA");
- assertMatches (full, queryY, sort, "BFHJD");
- }
-
- // test using various international locales with accented characters
- // (which sort differently depending on locale)
- public void testInternationalSort() throws Exception {
- sort.setSort (new SortField ("i18n", Locale.US));
- assertMatches (full, queryY, sort, "BFJDH");
-
- sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
- assertMatches (full, queryY, sort, "BJDFH");
-
- sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
- assertMatches (full, queryY, sort, "BJDHF");
-
- sort.setSort (new SortField ("i18n", Locale.US));
- assertMatches (full, queryX, sort, "ECAGI");
-
- sort.setSort (new SortField ("i18n", Locale.FRANCE));
- assertMatches (full, queryX, sort, "EACGI");
- }
-
- // Test the MultiSearcher's ability to preserve locale-sensitive ordering
- // by wrapping it around a single searcher
- public void testInternationalMultiSearcherSort() throws Exception {
- Searcher multiSearcher = new MultiSearcher (new Searchable[] { full });
-
- sort.setSort (new SortField ("i18n", new Locale("sv", "se")));
- assertMatches (multiSearcher, queryY, sort, "BJDFH");
-
- sort.setSort (new SortField ("i18n", Locale.US));
- assertMatches (multiSearcher, queryY, sort, "BFJDH");
-
- sort.setSort (new SortField ("i18n", new Locale("da", "dk")));
- assertMatches (multiSearcher, queryY, sort, "BJDHF");
- }
-
// test a variety of sorts using more than one searcher
public void testMultiSort() throws Exception {
MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY });
@@ -964,21 +912,7 @@
// up to this point, all of the searches should have "sane"
// FieldCache behavior, and should have reused hte cache in several cases
assertSaneFieldCaches(getName() + " various");
- // next we'll check Locale based (String[]) for 'string', so purge first
FieldCache.DEFAULT.purgeAllCaches();
-
- sort.setSort(new SortField ("string", Locale.US) );
- assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
- sort.setSort(new SortField ("string", Locale.US, true) );
- assertMatches(multi, queryA, sort, "CBEFGHIAJD");
-
- sort.setSort(new SortField ("string", Locale.UK) );
- assertMatches(multi, queryA, sort, "DJAIHGFEBC");
-
- assertSaneFieldCaches(getName() + " Locale.US + Locale.UK");
- FieldCache.DEFAULT.purgeAllCaches();
-
}
// make sure the documents returned by the search match the expected list
Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 986557)
+++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy)
@@ -27,8 +27,6 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory;
import java.io.IOException;
-import java.text.Collator;
-import java.util.Locale;
import junit.framework.Assert;
@@ -85,26 +83,18 @@
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
return query;
}
public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih);
+ TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(method);
return query;
}
/** macro for readability */
- public static Query csrq(String f, String l, String h, boolean il,
- boolean ih, Collator c) {
- TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c);
- query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- return query;
- }
-
- /** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
@@ -133,14 +123,6 @@
"data", "pr*t?j")));
}
- public void testBasicsRngCollating() throws IOException {
- Collator c = Collator.getInstance(Locale.ENGLISH);
- QueryUtils.check(csrq("data", "1", "6", T, T, c));
- QueryUtils.check(csrq("data", "A", "Z", T, T, c));
- QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A",
- "Z", T, T, c));
- }
-
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
@@ -248,7 +230,7 @@
// first do a regular TermRangeQuery which uses term expansion so
// docs with more terms in range get higher scores
- Query rq = new TermRangeQuery("data", "1", "4", T, T);
+ Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T);
ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs;
int numHits = expected.length;
@@ -397,89 +379,6 @@
assertEquals("med,med,T,T", 1, result.length);
}
- public void testRangeQueryIdCollating() throws IOException {
- // NOTE: uses index build in *super* setUp
-
- IndexReader reader = signedIndexReader;
- IndexSearcher search = new IndexSearcher(reader);
-
- int medId = ((maxId - minId) / 2);
-
- String minIP = pad(minId);
- String maxIP = pad(maxId);
- String medIP = pad(medId);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- ScoreDoc[] result;
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- // test id, bounded on both ends
-
- result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("all but last", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("all but first", numDocs - 1, result.length);
-
- result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("all but ends", numDocs - 2, result.length);
-
- result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("med and up", 1 + maxId - medId, result.length);
-
- result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("up to med", 1 + medId - minId, result.length);
-
- // unbounded id
-
- result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("min and up", numDocs, result.length);
-
- result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("max and down", numDocs, result.length);
-
- result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not min, but up", numDocs - 1, result.length);
-
- result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not max, but down", numDocs - 1, result.length);
-
- result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("med and up, not max", maxId - medId, result.length);
-
- result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("not min, up to med", medId - minId, result.length);
-
- // very small sets
-
- result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("min,min,F,F,c", 0, result.length);
- result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("med,med,F,F,c", 0, result.length);
- result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("max,max,F,F,c", 0, result.length);
-
- result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("min,min,T,T,c", 1, result.length);
- result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("nul,min,F,T,c", 1, result.length);
-
- result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("max,max,T,T,c", 1, result.length);
- result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("max,nul,T,T,c", 1, result.length);
-
- result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("med,med,T,T,c", 1, result.length);
- }
-
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
@@ -541,146 +440,4 @@
assertEquals("max,nul,T,T", 1, result.length);
}
-
- public void testRangeQueryRandCollating() throws IOException {
- // NOTE: uses index build in *super* setUp
-
- // using the unsigned index because collation seems to ignore hyphens
- IndexReader reader = unsignedIndexReader;
- IndexSearcher search = new IndexSearcher(reader);
-
- String minRP = pad(unsignedIndexDir.minR);
- String maxRP = pad(unsignedIndexDir.maxR);
-
- int numDocs = reader.numDocs();
-
- assertEquals("num of docs", numDocs, 1 + maxId - minId);
-
- ScoreDoc[] result;
-
- Collator c = Collator.getInstance(Locale.ENGLISH);
-
- // test extremes, bounded on both ends
-
- result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("find all", numDocs, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs;
- assertEquals("all but biggest", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("all but smallest", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("all but extremes", numDocs - 2, result.length);
-
- // unbounded
-
- result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("smallest and up", numDocs, result.length);
-
- result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("biggest and down", numDocs, result.length);
-
- result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not smallest, but up", numDocs - 1, result.length);
-
- result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("not biggest, but down", numDocs - 1, result.length);
-
- // very small sets
-
- result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("min,min,F,F,c", 0, result.length);
- result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs;
- assertEquals("max,max,F,F,c", 0, result.length);
-
- result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("min,min,T,T,c", 1, result.length);
- result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs;
- assertEquals("nul,min,F,T,c", 1, result.length);
-
- result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs;
- assertEquals("max,max,T,T,c", 1, result.length);
- result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs;
- assertEquals("max,nul,T,T,c", 1, result.length);
- }
-
- public void testFarsi() throws Exception {
-
- /* build an index */
- MockRAMDirectory farsiIndex = newDirectory(rand);
- RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
- Document doc = new Document();
- doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- doc
- .add(new Field("body", "body", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = new IndexSearcher(reader);
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is
- // not supported).
- ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T,
- c), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null,
- 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
- search.close();
- reader.close();
- farsiIndex.close();
- }
-
- public void testDanish() throws Exception {
-
- /* build an index */
- MockRAMDirectory danishIndex = newDirectory(rand);
- RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true));
-
- // Danish collation orders the words below in the given order
- // (example taken from TestSort.testInternationalSort() ).
- String[] words = { "H\u00D8T", "H\u00C5T", "MAND" };
- for (int docnum = 0 ; docnum < words.length ; ++docnum) {
- Document doc = new Document();
- doc.add(new Field("content", words[docnum],
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- doc.add(new Field("body", "body",
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- writer.addDocument(doc);
- }
- IndexReader reader = writer.getReader();
- writer.close();
-
- IndexSearcher search = new IndexSearcher(reader);
-
- Collator c = Collator.getInstance(new Locale("da", "dk"));
-
- // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
- // but Danish collation does.
- ScoreDoc[] result = search.search
- (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- result = search.search
- (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
- search.close();
- reader.close();
- danishIndex.close();
- }
}
Index: lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (revision 986557)
+++ lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (working copy)
@@ -66,7 +66,7 @@
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
- TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true);
+ TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true);
NumericRangeQuerynull. The collators
- * correspond to any SortFields which were given a specific locale.
- * @param fields Array of sort fields.
- * @return Array, possibly null.
- */
- private Collator[] hasCollators (final SortField[] fields) {
- if (fields == null) return null;
- Collator[] ret = new Collator[fields.length];
- for (int i=0; ib.
* @param a ScoreDoc
@@ -109,11 +82,9 @@
c = (s2 == null) ? 0 : -1;
} else if (s2 == null) {
c = 1;
- } else if (fields[i].getLocale() == null) {
- c = s1.compareTo(s2);
} else {
- c = collators[i].compare(s1.utf8ToString(), s2.utf8ToString());
- }
+ c = s1.compareTo(s2);
+ }
} else {
c = docA.fields[i].compareTo(docB.fields[i]);
if (type == SortField.SCORE) {
Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 986557)
+++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy)
@@ -18,7 +18,6 @@
*/
import java.io.IOException;
-import java.text.Collator;
import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
@@ -32,10 +31,6 @@
* greater than all that precede it.
lowerTerm is included in the range.
* @param includeUpper
* If true, the upperTerm is included in the range.
- * @param collator
- * The collator to use to collate index Terms, to determine their
- * membership in the range bounded by lowerTerm and
- * upperTerm.
*
* @throws IOException
*/
- public TermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText,
- boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ public TermRangeTermsEnum(IndexReader reader, String field, BytesRef lowerTerm, BytesRef upperTerm,
+ boolean includeLower, boolean includeUpper) throws IOException {
super(reader, field);
- this.collator = collator;
- this.upperTermText = upperTermText;
- this.lowerTermText = lowerTermText;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
// do a little bit of normalization...
// open ended range queries should always be inclusive.
- if (this.lowerTermText == null) {
- this.lowerTermText = "";
+ if (lowerTerm == null) {
+ this.lowerBytesRef = new BytesRef();
this.includeLower = true;
+ } else {
+ this.lowerBytesRef = lowerTerm;
}
- lowerBytesRef = new BytesRef(this.lowerTermText);
- if (this.upperTermText == null) {
+ if (upperTerm == null) {
this.includeUpper = true;
upperBytesRef = null;
} else {
- upperBytesRef = new BytesRef(upperTermText);
+ upperBytesRef = upperTerm;
}
- BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef("");
+ BytesRef startBytesRef = lowerBytesRef;
setInitialSeekTerm(startBytesRef);
termComp = getComparator();
}
@Override
protected AcceptStatus accept(BytesRef term) {
- if (collator == null) {
- if (!this.includeLower && term.equals(lowerBytesRef))
- return AcceptStatus.NO;
- // Use this field's default sort ordering
- if (upperBytesRef != null) {
- final int cmp = termComp.compare(upperBytesRef, term);
- /*
- * if beyond the upper term, or is exclusive and this is equal to
- * the upper term, break out
- */
- if ((cmp < 0) ||
- (!includeUpper && cmp==0)) {
- return AcceptStatus.END;
- }
- }
- return AcceptStatus.YES;
- } else {
- if ((includeLower
- ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
- : collator.compare(term.utf8ToString(), lowerTermText) > 0)
- && (upperTermText == null
- || (includeUpper
- ? collator.compare(term.utf8ToString(), upperTermText) <= 0
- : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
- return AcceptStatus.YES;
- }
+ if (!this.includeLower && term.equals(lowerBytesRef))
return AcceptStatus.NO;
+ // Use this field's default sort ordering
+ if (upperBytesRef != null) {
+ final int cmp = termComp.compare(upperBytesRef, term);
+ /*
+ * if beyond the upper term, or is exclusive and this is equal to
+ * the upper term, break out
+ */
+ if ((cmp < 0) ||
+ (!includeUpper && cmp==0)) {
+ return AcceptStatus.END;
+ }
}
+ return AcceptStatus.YES;
}
}
Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 986557)
+++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy)
@@ -1,5 +1,7 @@
package org.apache.lucene.search;
+import org.apache.lucene.util.BytesRef;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,15 +19,13 @@
* limitations under the License.
*/
-import java.text.Collator;
-
/**
* A Filter that restricts search results to a range of term
* values in a given field.
*
* This filter matches the documents looking for terms that fall into the
* supplied range according to {@link
- * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * Byte#compareTo(Byte)}, It is not intended
* for numerical ranges; use {@link NumericRangeFilter} instead.
*
*
If you construct a large number of range filters with different ranges but on the
@@ -44,39 +44,25 @@
* lowerTerm is null and includeLower is true (similar for upperTerm
* and includeUpper)
*/
- public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
+ public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm,
boolean includeLower, boolean includeUpper) {
super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
}
/**
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The lower bound on this range
- * @param upperTerm The upper bound on this range
- * @param includeLower Does this range include the lower bound?
- * @param includeUpper Does this range include the upper bound?
- * @param collator The collator to use when determining range inclusion; set
- * to null to use Unicode code point ordering instead of collation.
- * @throws IllegalArgumentException if both terms are null or if
- * lowerTerm is null and includeLower is true (similar for upperTerm
- * and includeUpper)
+ * Factory that creates a new TermRangeFilter using Strings for term text.
*/
- public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
- boolean includeLower, boolean includeUpper,
- Collator collator) {
- super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
+ public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+ BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+ BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+ return new TermRangeFilter(field, lower, upper, includeLower, includeUpper);
}
-
+
/**
* Constructs a filter for field fieldName matching
* less than or equal to upperTerm.
*/
- public static TermRangeFilter Less(String fieldName, String upperTerm) {
+ public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) {
return new TermRangeFilter(fieldName, null, upperTerm, false, true);
}
@@ -84,22 +70,19 @@
* Constructs a filter for field fieldName matching
* greater than or equal to lowerTerm.
*/
- public static TermRangeFilter More(String fieldName, String lowerTerm) {
+ public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) {
return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
}
/** Returns the lower value of this range filter */
- public String getLowerTerm() { return query.getLowerTerm(); }
+ public BytesRef getLowerTerm() { return query.getLowerTerm(); }
/** Returns the upper value of this range filter */
- public String getUpperTerm() { return query.getUpperTerm(); }
+ public BytesRef getUpperTerm() { return query.getUpperTerm(); }
/** Returns true if the lower endpoint is inclusive */
public boolean includesLower() { return query.includesLower(); }
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return query.includesUpper(); }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return query.getCollator(); }
}
Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 986557)
+++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -18,12 +18,12 @@
*/
import java.io.IOException;
-import java.text.Collator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
/**
@@ -31,7 +31,7 @@
*
*
This query matches the documents looking for terms that fall into the
* supplied range according to {@link
- * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * Byte#compareTo(Byte)}. It is not intended
* for numerical ranges; use {@link NumericRangeQuery} instead.
*
*
This query uses the {@link
@@ -41,13 +41,11 @@
*/
public class TermRangeQuery extends MultiTermQuery {
- private String lowerTerm;
- private String upperTerm;
- private Collator collator;
+ private BytesRef lowerTerm;
+ private BytesRef upperTerm;
private boolean includeLower;
private boolean includeUpper;
-
/**
* Constructs a query selecting all terms greater/equal than lowerTerm
* but less/equal than upperTerm.
@@ -70,78 +68,48 @@
* If true, the upperTerm is
* included in the range.
*/
- public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
- this(field, lowerTerm, upperTerm, includeLower, includeUpper, null);
- }
-
- /** Constructs a query selecting all terms greater/equal than
- * lowerTerm but less/equal than upperTerm.
- *
- * If an endpoint is null, it is said - * to be "open". Either or both endpoints may be open. Open endpoints may not - * be exclusive (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - *
- * If collator is not null, it will be used to decide whether
- * index terms are within the given range, rather than using the Unicode code
- * point order in which index terms are stored.
- *
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The Term text at the lower end of the range
- * @param upperTerm The Term text at the upper end of the range
- * @param includeLower
- * If true, the lowerTerm is
- * included in the range.
- * @param includeUpper
- * If true, the upperTerm is
- * included in the range.
- * @param collator The collator to use to collate index Terms, to determine
- * their membership in the range bounded by lowerTerm and
- * upperTerm.
- */
- public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
- Collator collator) {
+ public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
super(field);
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
- this.collator = collator;
}
+
+ /**
+ * Factory that creates a new TermRangeQuery using Strings for term text.
+ */
+ public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+ BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+ BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+ return new TermRangeQuery(field, lower, upper, includeLower, includeUpper);
+ }
/** Returns the lower value of this range query */
- public String getLowerTerm() { return lowerTerm; }
+ public BytesRef getLowerTerm() { return lowerTerm; }
/** Returns the upper value of this range query */
- public String getUpperTerm() { return upperTerm; }
+ public BytesRef getUpperTerm() { return upperTerm; }
/** Returns true if the lower endpoint is inclusive */
public boolean includesLower() { return includeLower; }
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return collator; }
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
+ if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
return TermsEnum.EMPTY;
}
- if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) {
+ if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
// NOTE: for now, MultiTermQuery enums terms at the
// MultiReader level, so we must use MultiFields here:
final Terms terms = MultiFields.getTerms(reader, field);
return (terms != null) ? terms.iterator() : null;
}
return new TermRangeTermsEnum(reader, field,
- lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ lowerTerm, upperTerm, includeLower, includeUpper);
}
/** @deprecated */
@@ -159,9 +127,9 @@
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
- buffer.append(lowerTerm != null ? lowerTerm : "*");
+ buffer.append(lowerTerm != null ? lowerTerm.utf8ToString() : "*");
buffer.append(" TO ");
- buffer.append(upperTerm != null ? upperTerm : "*");
+ buffer.append(upperTerm != null ? upperTerm.utf8ToString() : "*");
buffer.append(includeUpper ? ']' : '}');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@@ -171,7 +139,6 @@
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
- result = prime * result + ((collator == null) ? 0 : collator.hashCode());
result = prime * result + (includeLower ? 1231 : 1237);
result = prime * result + (includeUpper ? 1231 : 1237);
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
@@ -188,11 +155,6 @@
if (getClass() != obj.getClass())
return false;
TermRangeQuery other = (TermRangeQuery) obj;
- if (collator == null) {
- if (other.collator != null)
- return false;
- } else if (!collator.equals(other.collator))
- return false;
if (includeLower != other.includeLower)
return false;
if (includeUpper != other.includeUpper)
@@ -209,5 +171,4 @@
return false;
return true;
}
-
}
Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 986557)
+++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy)
@@ -618,83 +618,6 @@
}
}
- /** Sorts by a field's value using the Collator for a
- * given Locale.
- *
- *
WARNING: this is likely very slow; you'll
- * get much better performance using the
- * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */
- public static final class StringComparatorLocale extends FieldComparator {
-
- private final String[] values;
- private DocTerms currentDocTerms;
- private final String field;
- final Collator collator;
- private String bottom;
- private final BytesRef tempBR = new BytesRef();
-
- StringComparatorLocale(int numHits, String field, Locale locale) {
- values = new String[numHits];
- this.field = field;
- collator = Collator.getInstance(locale);
- }
-
- @Override
- public int compare(int slot1, int slot2) {
- final String val1 = values[slot1];
- final String val2 = values[slot2];
- if (val1 == null) {
- if (val2 == null) {
- return 0;
- }
- return -1;
- } else if (val2 == null) {
- return 1;
- }
- return collator.compare(val1, val2);
- }
-
- @Override
- public int compareBottom(int doc) {
- final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
- if (bottom == null) {
- if (val2 == null) {
- return 0;
- }
- return -1;
- } else if (val2 == null) {
- return 1;
- }
- return collator.compare(bottom, val2);
- }
-
- @Override
- public void copy(int slot, int doc) {
- final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
- if (br == null) {
- values[slot] = null;
- } else {
- values[slot] = br.utf8ToString();
- }
- }
-
- @Override
- public void setNextReader(IndexReader reader, int docBase) throws IOException {
- currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field);
- }
-
- @Override
- public void setBottom(final int bottom) {
- this.bottom = values[bottom];
- }
-
- @Override
- public Comparable> value(int slot) {
- final String s = values[slot];
- return s == null ? null : new BytesRef(values[slot]);
- }
- }
-
/** Sorts by field's natural Term sort order, using
* ordinals. This is functionally equivalent to {@link
* TermValComparator}, but it first resolves the string
Index: lucene/src/java/org/apache/lucene/search/SortField.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/SortField.java (revision 986557)
+++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Serializable;
-import java.util.Locale;
import org.apache.lucene.util.StringHelper;
@@ -91,7 +90,6 @@
private String field;
private int type; // defaults to determining type dynamically
- private Locale locale; // defaults to "natural order" (no Locale)
boolean reverse = false; // defaults to natural order
private FieldCache.Parser parser;
@@ -159,27 +157,6 @@
this.parser = parser;
}
- /** Creates a sort by terms in the given field sorted
- * according to the given locale.
- * @param field Name of field to sort by, cannot be null.
- * @param locale Locale of values in the field.
- */
- public SortField (String field, Locale locale) {
- initFieldType(field, STRING);
- this.locale = locale;
- }
-
- /** Creates a sort, possibly in reverse, by terms in the given field sorted
- * according to the given locale.
- * @param field Name of field to sort by, cannot be null.
- * @param locale Locale of values in the field.
- */
- public SortField (String field, Locale locale, boolean reverse) {
- initFieldType(field, STRING);
- this.locale = locale;
- this.reverse = reverse;
- }
-
/** Creates a sort with a custom comparison function.
* @param field Name of field to sort by; cannot be null.
* @param comparator Returns a comparator for sorting hits.
@@ -227,14 +204,6 @@
return type;
}
- /** Returns the Locale by which term values are interpreted.
- * May return null if no Locale was specified.
- * @return Locale, or null.
- */
- public Locale getLocale() {
- return locale;
- }
-
/** Returns the instance of a {@link FieldCache} parser that fits to the given sort type.
* May return null if no parser was specified. Sorting is using the default parser then.
* @return An instance of a {@link FieldCache} parser, or null.
@@ -310,7 +279,6 @@
break;
}
- if (locale != null) buffer.append('(').append(locale).append(')');
if (parser != null) buffer.append('(').append(parser).append(')');
if (reverse) buffer.append('!');
@@ -330,7 +298,6 @@
other.field == this.field // field is always interned
&& other.type == this.type
&& other.reverse == this.reverse
- && (other.locale == null ? this.locale == null : other.locale.equals(this.locale))
&& (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
&& (other.parser == null ? this.parser == null : other.parser.equals(this.parser))
);
@@ -345,7 +312,6 @@
public int hashCode() {
int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb;
if (field != null) hash += field.hashCode()^0xff5685dd;
- if (locale != null) hash += locale.hashCode()^0x08150815;
if (comparatorSource != null) hash += comparatorSource.hashCode();
if (parser != null) hash += parser.hashCode()^0x3aaf56ff;
return hash;
@@ -371,14 +337,6 @@
* @return {@link FieldComparator} to use when sorting
*/
public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException {
-
- if (locale != null) {
- // TODO: it'd be nice to allow FieldCache.getStringIndex
- // to optionally accept a Locale so sorting could then use
- // the faster StringComparator impls
- return new FieldComparator.StringComparatorLocale(numHits, field, locale);
- }
-
switch (type) {
case SortField.SCORE:
return new FieldComparator.RelevanceComparator(numHits);
Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
===================================================================
--- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 986557)
+++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
-import java.text.Collator;
import java.text.DateFormat;
import java.util.Calendar;
import java.util.Date;
@@ -653,56 +652,6 @@
"gack (bar blar {a TO z})");
}
- public void testFarsiRangeCollating() throws Exception {
- Random random = newRandom();
- MockRAMDirectory ramDir = newDirectory(random);
- IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)));
- Document doc = new Document();
- doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- iw.addDocument(doc);
- iw.close();
- IndexSearcher is = new IndexSearcher(ramDir, true);
-
- StandardQueryParser qp = new StandardQueryParser();
- qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the
- // Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
- qp.setRangeCollator(c);
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the
- // single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is
- // not
- // supported).
-
- // Test ConstantScoreRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"),
- null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- // Test RangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- is.close();
- ramDir.close();
- }
-
/** for testing legacy DateField support */
private String getLegacyDate(String s) throws Exception {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
===================================================================
--- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 986557)
+++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
-import java.text.Collator;
import java.text.DateFormat;
import java.util.Arrays;
import java.util.Calendar;
@@ -651,53 +650,6 @@
assertQueryEquals("gack ( bar blar { a TO z}) ", null,
"gack (bar blar {a TO z})");
}
-
- public void testFarsiRangeCollating() throws Exception {
-
- MockRAMDirectory ramDir = newDirectory(newRandom());
- IndexWriter iw = new IndexWriter(ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), true,
- IndexWriter.MaxFieldLength.LIMITED);
- Document doc = new Document();
- doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
- Field.Index.NOT_ANALYZED));
- iw.addDocument(doc);
- iw.close();
- IndexSearcher is = new IndexSearcher(ramDir, true);
-
- QueryParserWrapper qp = new QueryParserWrapper("content",
- new MockAnalyzer(MockTokenizer.WHITESPACE, false));
-
- // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
- // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
- // characters properly.
- Collator c = Collator.getInstance(new Locale("ar"));
- qp.setRangeCollator(c);
-
- // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
- // orders the U+0698 character before the U+0633 character, so the single
- // index Term below should NOT be returned by a ConstantScoreRangeQuery
- // with a Farsi Collator (or an Arabic one for the case when Farsi is not
- // supported).
-
- // Test ConstantScoreRangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
- ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- // Test RangeQuery
- qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should not be included.", 0, result.length);
-
- result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
- assertEquals("The index Term should be included.", 1, result.length);
-
- is.close();
- ramDir.close();
- }
private String escapeDateString(String s) {
if (s.contains(" ")) {
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (revision 986557)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (working copy)
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import java.text.Collator;
import java.text.DateFormat;
import java.util.Calendar;
import java.util.Date;
@@ -37,7 +36,6 @@
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.standard.config.DateResolutionAttribute;
import org.apache.lucene.queryParser.standard.config.LocaleAttribute;
-import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
/**
@@ -55,12 +53,7 @@
* If a {@link DateResolutionAttribute} is defined and the {@link Resolution} is
* not null it will also be used to parse the date value.
*
- * This processor will also try to retrieve a {@link RangeCollatorAttribute}
- * from the {@link QueryConfigHandler}. If a {@link RangeCollatorAttribute} is
- * found and the {@link Collator} is not null, it's set on the
- * {@link RangeQueryNode}.
*
- * @see RangeCollatorAttribute
* @see DateResolutionAttribute
* @see LocaleAttribute
* @see RangeQueryNode
@@ -80,17 +73,9 @@
ParametricQueryNode upper = parametricRangeNode.getUpperBound();
ParametricQueryNode lower = parametricRangeNode.getLowerBound();
Locale locale = Locale.getDefault();
- Collator collator = null;
DateTools.Resolution dateRes = null;
boolean inclusive = false;
- if (getQueryConfigHandler().hasAttribute(RangeCollatorAttribute.class)) {
-
- collator = getQueryConfigHandler().getAttribute(
- RangeCollatorAttribute.class).getRangeCollator();
-
- }
-
if (getQueryConfigHandler().hasAttribute(LocaleAttribute.class)) {
locale = getQueryConfigHandler().getAttribute(LocaleAttribute.class)
@@ -158,7 +143,7 @@
lower.setText(part1);
upper.setText(part2);
- return new RangeQueryNode(lower, upper, collator);
+ return new RangeQueryNode(lower, upper);
}
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (revision 986557)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (working copy)
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import java.text.Collator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -45,7 +44,6 @@
import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttribute;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute;
-import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser;
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
@@ -318,20 +316,6 @@
}
- public Collator getRangeCollator() {
-
- if (this.config != null
- && this.config.hasAttribute(RangeCollatorAttribute.class)) {
-
- return this.config.getAttribute(RangeCollatorAttribute.class)
- .getRangeCollator();
-
- }
-
- return null;
-
- }
-
public boolean getUseOldRangeQuery() {
if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
return true;
@@ -421,10 +405,6 @@
this.qpHelper.setDefaultPhraseSlop(phraseSlop);
}
- public void setRangeCollator(Collator rc) {
- this.qpHelper.setRangeCollator(rc);
- }
-
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
if (useOldRangeQuery) {
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (revision 986557)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (working copy)
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import java.text.Collator;
import java.util.Locale;
import java.util.Map;
import java.util.TooManyListenersException;
@@ -41,10 +40,8 @@
import org.apache.lucene.queryParser.standard.config.MultiFieldAttribute;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute;
-import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute;
import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
-import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser;
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
import org.apache.lucene.search.FuzzyQuery;
@@ -188,32 +185,6 @@
}
/**
- * Sets the collator used to determine index term inclusion in ranges for
- * RangeQuerys.
- *
capital of Hungary is equal to
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (revision 986557)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (working copy)
@@ -1,94 +0,0 @@
-package org.apache.lucene.queryParser.standard.config;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.text.Collator;
-
-import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
-import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor
- * and must be defined in the {@link QueryConfigHandler}. This attribute tells
- * the processor which {@link Collator} should be used for a
- * {@link TermRangeQuery} Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal thanlowerTerm
+ * but less/equal than upperTerm.
+ *
+ * If an endpoint is null, it is said to be "open". Either or both
+ * endpoints may be open. Open endpoints may not be exclusive
+ * (you can't select all but the first or last term without
+ * explicitly specifying the term to exclude.)
+ *
+ * @param reader
+ * @param field
+ * An interned field that holds both lower and upper terms.
+ * @param lowerTermText
+ * The term text at the lower end of the range
+ * @param upperTermText
+ * The term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is included in the range.
+ * @param collator
+ * The collator to use to collate index Terms, to determine their
+ * membership in the range bounded by lowerTerm and
+ * upperTerm.
+ *
+ * @throws IOException
+ */
+ public SlowCollatedTermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText,
+ boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ super(reader, field);
+ this.collator = collator;
+ this.upperTermText = upperTermText;
+ this.lowerTermText = lowerTermText;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+
+ // do a little bit of normalization...
+ // open ended range queries should always be inclusive.
+ if (this.lowerTermText == null) {
+ this.lowerTermText = "";
+ this.includeLower = true;
+ }
+
+ // TODO: optimize
+ BytesRef startBytesRef = new BytesRef("");
+ setInitialSeekTerm(startBytesRef);
+ }
+
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ if ((includeLower
+ ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
+ : collator.compare(term.utf8ToString(), lowerTermText) > 0)
+ && (upperTermText == null
+ || (includeUpper
+ ? collator.compare(term.utf8ToString(), upperTermText) <= 0
+ : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
+ return AcceptStatus.YES;
+ }
+ return AcceptStatus.NO;
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeTermsEnum.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0)
@@ -0,0 +1,70 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.Collator;
+
+/**
+ * A Filter that restricts search results to a range of term
+ * values in a given field.
+ *
+ * This filter matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeFilter} instead.
+ *
+ *
If you construct a large number of range filters with different ranges but on the
+ * same field, {@link FieldCacheRangeFilter} may have significantly better performance.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter This query matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public class SlowCollatedTermRangeQuery extends MultiTermQuery {
+ private String lowerTerm;
+ private String upperTerm;
+ private boolean includeLower;
+ private boolean includeUpper;
+ private Collator collator;
+
+ /** Constructs a query selecting all terms greater/equal than
+ *
+ * If an endpoint is null, it is said
+ * to be "open". Either or both endpoints may be open. Open endpoints may not
+ * be exclusive (you can't select all but the first or last term without
+ * explicitly specifying the term to exclude.)
+ *
+ *
+ * @param lowerTerm The Term text at the lower end of the range
+ * @param upperTerm The Term text at the upper end of the range
+ * @param includeLower
+ * If true, the WARNING: this is very slow; you'll
+ * get much better performance using the
+ * CollationKeyAnalyzer or ICUCollationKeyAnalyzer.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public final class SlowCollatedStringComparator extends FieldComparator {
+
+ private final String[] values;
+ private DocTerms currentDocTerms;
+ private final String field;
+ final Collator collator;
+ private String bottom;
+ private final BytesRef tempBR = new BytesRef();
+
+ SlowCollatedStringComparator(int numHits, String field, Collator collator) {
+ values = new String[numHits];
+ this.field = field;
+ this.collator = collator;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ final String val1 = values[slot1];
+ final String val2 = values[slot2];
+ if (val1 == null) {
+ if (val2 == null) {
+ return 0;
+ }
+ return -1;
+ } else if (val2 == null) {
+ return 1;
+ }
+ return collator.compare(val1, val2);
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
+ if (bottom == null) {
+ if (val2 == null) {
+ return 0;
+ }
+ return -1;
+ } else if (val2 == null) {
+ return 1;
+ }
+ return collator.compare(bottom, val2);
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
+ if (br == null) {
+ values[slot] = null;
+ } else {
+ values[slot] = br.utf8ToString();
+ }
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field);
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable> value(int slot) {
+ final String s = values[slot];
+ return s == null ? null : new BytesRef(values[slot]);
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedStringComparator.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/MIGRATE.txt
===================================================================
--- lucene/MIGRATE.txt (revision 986557)
+++ lucene/MIGRATE.txt (working copy)
@@ -266,3 +266,7 @@
Likewise for DocsAndPositionsEnum.
+LUCENE-2514: The option to use a Collator's order (instead of unicode order) for
+TermRangeQuery/Filter has been moved to CollatedTermRangeQuery/Filter.
+Note: this functionality isn't very scalable and if you are using it, consider
+indexing collation keys with the collation support in the analysis module instead.
true if the lower endpoint is inclusive */
+ public boolean includesLower() { return query.includesLower(); }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return query.includesUpper(); }
+
+ /** Returns the collator used to determine range inclusion, if any. */
+ public Collator getCollator() { return query.getCollator(); }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeFilter.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0)
@@ -0,0 +1,177 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.ToStringUtils;
+
+/**
+ * A Query that matches documents within an range of terms.
+ *
+ * Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeQuery} instead.
+ *
+ * lowerTerm but less/equal than upperTerm.
+ * lowerTerm is
+ * included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is
+ * included in the range.
+ * @param collator The collator to use to collate index Terms, to determine
+ * their membership in the range bounded by lowerTerm and
+ * upperTerm.
+ */
+ public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
+ boolean includeLower, boolean includeUpper, Collator collator) {
+ super(field);
+ this.lowerTerm = lowerTerm;
+ this.upperTerm = upperTerm;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.collator = collator;
+ }
+
+ /** Returns the lower value of this range query */
+ public String getLowerTerm() { return lowerTerm; }
+
+ /** Returns the upper value of this range query */
+ public String getUpperTerm() { return upperTerm; }
+
+ /** Returns true if the lower endpoint is inclusive */
+ public boolean includesLower() { return includeLower; }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return includeUpper; }
+
+ /** Returns the collator used to determine range inclusion */
+ public Collator getCollator() { return collator; }
+
+ @Override
+ protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+ if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
+ return TermsEnum.EMPTY;
+ }
+ if (lowerTerm == null && upperTerm == null) {
+ // NOTE: debateably, the caller should never pass in a
+ // multi reader...
+ final Terms terms = MultiFields.getTerms(reader, field);
+ return (terms != null) ? terms.iterator() : null;
+ }
+ return new SlowCollatedTermRangeTermsEnum(reader, field,
+ lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ }
+
+ /** @deprecated */
+ @Deprecated
+ public String field() {
+ return getField();
+ }
+
+ /** Prints a user-readable version of this query. */
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!getField().equals(field)) {
+ buffer.append(getField());
+ buffer.append(":");
+ }
+ buffer.append(includeLower ? '[' : '{');
+ buffer.append(lowerTerm != null ? lowerTerm : "*");
+ buffer.append(" TO ");
+ buffer.append(upperTerm != null ? upperTerm : "*");
+ buffer.append(includeUpper ? ']' : '}');
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((collator == null) ? 0 : collator.hashCode());
+ result = prime * result + (includeLower ? 1231 : 1237);
+ result = prime * result + (includeUpper ? 1231 : 1237);
+ result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
+ result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj;
+ if (collator == null) {
+ if (other.collator != null)
+ return false;
+ } else if (!collator.equals(other.collator))
+ return false;
+ if (includeLower != other.includeLower)
+ return false;
+ if (includeUpper != other.includeUpper)
+ return false;
+ if (lowerTerm == null) {
+ if (other.lowerTerm != null)
+ return false;
+ } else if (!lowerTerm.equals(other.lowerTerm))
+ return false;
+ if (upperTerm == null) {
+ if (other.upperTerm != null)
+ return false;
+ } else if (!upperTerm.equals(other.upperTerm))
+ return false;
+ return true;
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeQuery.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0)
@@ -0,0 +1,105 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.FieldCache.DocTerms;
+import org.apache.lucene.util.BytesRef;
+
+/** Sorts by a field's value using the given Collator
+ *
+ *