Index: solr/src/java/org/apache/solr/schema/DateField.java =================================================================== --- solr/src/java/org/apache/solr/schema/DateField.java (revision 986557) +++ solr/src/java/org/apache/solr/schema/DateField.java (working copy) @@ -407,7 +407,7 @@ /** DateField specific range query */ public Query getRangeQuery(QParser parser, SchemaField sf, Date part1, Date part2, boolean minInclusive, boolean maxInclusive) { - return new TermRangeQuery( + return TermRangeQuery.newStringRange( sf.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/schema/FieldType.java =================================================================== --- solr/src/java/org/apache/solr/schema/FieldType.java (revision 986557) +++ solr/src/java/org/apache/solr/schema/FieldType.java (working copy) @@ -529,7 +529,7 @@ */ public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { // constant score mode is now enabled per default - return new TermRangeQuery( + return TermRangeQuery.newStringRange( field.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/search/QueryParsing.java =================================================================== --- solr/src/java/org/apache/solr/search/QueryParsing.java (revision 986557) +++ solr/src/java/org/apache/solr/search/QueryParsing.java (working copy) @@ -443,8 +443,8 @@ String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); - String lt = q.getLowerTerm(); - String ut = q.getUpperTerm(); + String lt = q.getLowerTerm().utf8ToString(); + String ut = q.getUpperTerm().utf8ToString(); if (lt == null) { out.append('*'); } else { Index: solr/src/java/org/apache/solr/handler/component/ShardDoc.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/ShardDoc.java (revision 986557) +++ solr/src/java/org/apache/solr/handler/component/ShardDoc.java (working copy) @@ -94,15 +94,10 @@ String fieldname = fields[i].getField(); comparators[i] = getCachedComparator(fieldname, fields[i] - .getType(), fields[i].getLocale(), fields[i].getComparatorSource()); + .getType(), fields[i].getComparatorSource()); - if (fields[i].getType() == SortField.STRING) { - this.fields[i] = new SortField(fieldname, fields[i].getLocale(), + this.fields[i] = new SortField(fieldname, fields[i].getType(), fields[i].getReverse()); - } else { - this.fields[i] = new SortField(fieldname, fields[i].getType(), - fields[i].getReverse()); - } //System.out.println("%%%%%%%%%%%%%%%%%% got "+fields[i].getType() +" for "+ fieldname +" fields[i].getReverse(): "+fields[i].getReverse()); } @@ -144,17 +139,14 @@ return c < 0; } - Comparator getCachedComparator(String fieldname, int type, Locale locale, FieldComparatorSource factory) { + Comparator getCachedComparator(String fieldname, int type, FieldComparatorSource factory) { Comparator comparator = null; switch (type) { case SortField.SCORE: comparator = comparatorScore(fieldname); break; case SortField.STRING: - if (locale != null) - comparator = comparatorStringLocale(fieldname, locale); - else - comparator = comparatorNatural(fieldname); + comparator = comparatorNatural(fieldname); break; case SortField.CUSTOM: if (factory instanceof MissingStringLastComparatorSource){ @@ -242,28 +234,6 @@ }; } - - Comparator comparatorStringLocale(final String fieldName, - Locale locale) { - final Collator collator = Collator.getInstance(locale); - return new ShardComparator(fieldName) { - public final int compare(final Object o1, final Object o2) { - ShardDoc sd1 = (ShardDoc) o1; - ShardDoc sd2 = (ShardDoc) o2; - Comparable v1 = (Comparable)sortVal(sd1); - Comparable v2 = (Comparable)sortVal(sd2); - if (v1==v2) - return 0; - if (v1==null) - return 1; - if(v2==null) - return -1; - return -collator.compare(v1,v2); - } - }; - } - - Comparator comparatorMissingStringLast(final String fieldName) { return new ShardComparator(fieldName) { public final int compare(final Object o1, final Object o2) { Index: modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (revision 986557) +++ modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (working copy) @@ -89,11 +89,11 @@ // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search - (query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs; + (query, TermRangeFilter.newStringRange("content", firstBeg, firstEnd, true, true), 1).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search - (query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs; + (query, TermRangeFilter.newStringRange("content", secondBeg, secondEnd, true, true), 1).scoreDocs; assertEquals("The index Term should be included.", 1, result.length); searcher.close(); @@ -117,11 +117,11 @@ writer.close(); IndexSearcher searcher = new IndexSearcher(ramDir, true); - Query query = new TermRangeQuery("content", firstBeg, firstEnd, true, true); + Query query = TermRangeQuery.newStringRange("content", firstBeg, firstEnd, true, true); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, hits.length); - query = new TermRangeQuery("content", secondBeg, secondEnd, true, true); + query = TermRangeQuery.newStringRange("content", secondBeg, secondEnd, true, true); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, hits.length); searcher.close(); @@ -150,11 +150,11 @@ // with a Farsi Collator (or an Arabic one for the case when Farsi is // not supported). Query csrq - = new TermRangeQuery("content", firstBeg, firstEnd, true, true); + = TermRangeQuery.newStringRange("content", firstBeg, firstEnd, true, true); ScoreDoc[] result = search.search(csrq, null, 1000).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); - csrq = new TermRangeQuery + csrq = TermRangeQuery.newStringRange ("content", secondBeg, secondEnd, true, true); result = search.search(csrq, null, 1000).scoreDocs; assertEquals("The index Term should be included.", 1, result.length); Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 986557) +++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.text.Collator; import java.text.DateFormat; import java.util.Arrays; import java.util.Calendar; @@ -568,51 +567,6 @@ assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); } - - public void testFarsiRangeCollating() throws Exception { - Random random = newRandom(); - MockRAMDirectory ramDir = newDirectory(random); - IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); - Document doc = new Document(); - doc.add(new Field("content","\u0633\u0627\u0628", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - iw.addDocument(doc); - iw.close(); - IndexSearcher is = new IndexSearcher(ramDir, true); - - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - qp.setRangeCollator(c); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is not - // supported). - - // Test ConstantScoreRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - // Test TermRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - is.close(); - ramDir.close(); - } private String escapeDateString(String s) { if (s.indexOf(" ") > -1) { Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -32,12 +32,10 @@ import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; import java.io.Reader; -import java.util.Locale; import java.util.Random; import java.util.Set; import java.util.HashSet; import java.util.Arrays; -import java.text.Collator; public class TestTermRangeQuery extends LuceneTestCase { @@ -60,7 +58,7 @@ } public void testExclusive() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false); + Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -81,7 +79,7 @@ } public void testInclusive() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", true, true); + Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -111,11 +109,11 @@ query = new TermRangeQuery("content", null, null, false, false); assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); - query = new TermRangeQuery("content", "", null, true, false); + query = TermRangeQuery.newStringRange("content", "", null, true, false); assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); // and now anothe one - query = new TermRangeQuery("content", "B", null, true, false); + query = TermRangeQuery.newStringRange("content", "B", null, true, false); assertTrue(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length); searcher.close(); @@ -127,7 +125,7 @@ initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}); IndexSearcher searcher = new IndexSearcher(dir, true); - TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true); + TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true); checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J"); final int savedClauseCount = BooleanQuery.getMaxClauseCount(); @@ -156,10 +154,10 @@ } public void testEqualsHashcode() { - Query query = new TermRangeQuery("content", "A", "C", true, true); + Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); query.setBoost(1.0f); - Query other = new TermRangeQuery("content", "A", "C", true, true); + Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true); other.setBoost(1.0f); assertEquals("query equals itself is true", query, query); @@ -169,122 +167,34 @@ other.setBoost(2.0f); assertFalse("Different boost queries are not equal", query.equals(other)); - other = new TermRangeQuery("notcontent", "A", "C", true, true); + other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true); assertFalse("Different fields are not equal", query.equals(other)); - other = new TermRangeQuery("content", "X", "C", true, true); + other = TermRangeQuery.newStringRange("content", "X", "C", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); - other = new TermRangeQuery("content", "A", "Z", true, true); + other = TermRangeQuery.newStringRange("content", "A", "Z", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); - query = new TermRangeQuery("content", null, "C", true, true); - other = new TermRangeQuery("content", null, "C", true, true); + query = TermRangeQuery.newStringRange("content", null, "C", true, true); + other = TermRangeQuery.newStringRange("content", null, "C", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); - query = new TermRangeQuery("content", "C", null, true, true); - other = new TermRangeQuery("content", "C", null, true, true); + query = TermRangeQuery.newStringRange("content", "C", null, true, true); + other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); - query = new TermRangeQuery("content", null, "C", true, true); - other = new TermRangeQuery("content", "C", null, true, true); + query = TermRangeQuery.newStringRange("content", null, "C", true, true); + other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); - query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", true, true); + query = TermRangeQuery.newStringRange("content", "A", "C", false, false); + other = TermRangeQuery.newStringRange("content", "A", "C", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); - - query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); - assertFalse("a query with a collator is not equal to one without", query.equals(other)); } - public void testExclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); - initializeIndex(new String[] {"A", "B", "C", "D"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,C,D, only B in range", 1, hits.length); - searcher.close(); - - initializeIndex(new String[] {"A", "B", "D"}); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,D, only B in range", 1, hits.length); - searcher.close(); - - addDoc("C"); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("C added, still only B in range", 1, hits.length); - searcher.close(); - } - - public void testInclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); - - initializeIndex(new String[]{"A", "B", "C", "D"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); - searcher.close(); - - initializeIndex(new String[]{"A", "B", "D"}); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,D - A and B in range", 2, hits.length); - searcher.close(); - - addDoc("C"); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("C added - A, B, C in range", 3, hits.length); - searcher.close(); - } - - public void testFarsi() throws Exception { - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator); - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeQuery with a Farsi - // Collator (or an Arabic one for the case when Farsi is not supported). - initializeIndex(new String[]{ "\u0633\u0627\u0628"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, hits.length); - - query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, hits.length); - searcher.close(); - } - - public void testDanish() throws Exception { - Collator collator = Collator.getInstance(new Locale("da", "dk")); - // Danish collation orders the words below in the given order (example taken - // from TestSort.testInternationalSort() ). - String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - initializeIndex(words); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, hits.length); - - query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, hits.length); - searcher.close(); - } - private static class SingleCharAnalyzer extends Analyzer { private static class SingleCharTokenizer extends Tokenizer { @@ -369,7 +279,7 @@ public void testExclusiveLowerNull() throws Exception { Analyzer analyzer = new SingleCharAnalyzer(); //http://issues.apache.org/jira/browse/LUCENE-38 - Query query = new TermRangeQuery("content", null, "C", + Query query = TermRangeQuery.newStringRange("content", null, "C", false, false); initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -402,7 +312,7 @@ public void testInclusiveLowerNull() throws Exception { //http://issues.apache.org/jira/browse/LUCENE-38 Analyzer analyzer = new SingleCharAnalyzer(); - Query query = new TermRangeQuery("content", null, "C", true, true); + Query query = TermRangeQuery.newStringRange("content", null, "C", true, true); initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer); IndexSearcher searcher = new IndexSearcher(dir, true); int numHits = searcher.search(query, null, 1000).totalHits; Index: lucene/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -23,7 +23,6 @@ import java.util.BitSet; import java.util.HashMap; import java.util.Iterator; -import java.util.Locale; import java.util.Random; import junit.framework.Test; @@ -518,12 +517,6 @@ sort.setSort (new SortField ("string", SortField.STRING, true) ); assertMatches (full, queryF, sort, "IJZ"); - sort.setSort (new SortField ("i18n", Locale.ENGLISH)); - assertMatches (full, queryF, sort, "ZJI"); - - sort.setSort (new SortField ("i18n", Locale.ENGLISH, true)); - assertMatches (full, queryF, sort, "IJZ"); - sort.setSort (new SortField ("int", SortField.INT) ); assertMatches (full, queryF, sort, "IZJ"); @@ -595,51 +588,6 @@ assertMatches (full, queryX, sort, "GICEA"); } - // test using a Locale for sorting strings - public void testLocaleSort() throws Exception { - sort.setSort (new SortField ("string", Locale.US) ); - assertMatches (full, queryX, sort, "AIGEC"); - assertMatches (full, queryY, sort, "DJHFB"); - - sort.setSort (new SortField ("string", Locale.US, true) ); - assertMatches (full, queryX, sort, "CEGIA"); - assertMatches (full, queryY, sort, "BFHJD"); - } - - // test using various international locales with accented characters - // (which sort differently depending on locale) - public void testInternationalSort() throws Exception { - sort.setSort (new SortField ("i18n", Locale.US)); - assertMatches (full, queryY, sort, "BFJDH"); - - sort.setSort (new SortField ("i18n", new Locale("sv", "se"))); - assertMatches (full, queryY, sort, "BJDFH"); - - sort.setSort (new SortField ("i18n", new Locale("da", "dk"))); - assertMatches (full, queryY, sort, "BJDHF"); - - sort.setSort (new SortField ("i18n", Locale.US)); - assertMatches (full, queryX, sort, "ECAGI"); - - sort.setSort (new SortField ("i18n", Locale.FRANCE)); - assertMatches (full, queryX, sort, "EACGI"); - } - - // Test the MultiSearcher's ability to preserve locale-sensitive ordering - // by wrapping it around a single searcher - public void testInternationalMultiSearcherSort() throws Exception { - Searcher multiSearcher = new MultiSearcher (new Searchable[] { full }); - - sort.setSort (new SortField ("i18n", new Locale("sv", "se"))); - assertMatches (multiSearcher, queryY, sort, "BJDFH"); - - sort.setSort (new SortField ("i18n", Locale.US)); - assertMatches (multiSearcher, queryY, sort, "BFJDH"); - - sort.setSort (new SortField ("i18n", new Locale("da", "dk"))); - assertMatches (multiSearcher, queryY, sort, "BJDHF"); - } - // test a variety of sorts using more than one searcher public void testMultiSort() throws Exception { MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY }); @@ -964,21 +912,7 @@ // up to this point, all of the searches should have "sane" // FieldCache behavior, and should have reused hte cache in several cases assertSaneFieldCaches(getName() + " various"); - // next we'll check Locale based (String[]) for 'string', so purge first FieldCache.DEFAULT.purgeAllCaches(); - - sort.setSort(new SortField ("string", Locale.US) ); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField ("string", Locale.US, true) ); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - - sort.setSort(new SortField ("string", Locale.UK) ); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - assertSaneFieldCaches(getName() + " Locale.US + Locale.UK"); - FieldCache.DEFAULT.purgeAllCaches(); - } // make sure the documents returned by the search match the expected list Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -27,8 +27,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockRAMDirectory; import java.io.IOException; -import java.text.Collator; -import java.util.Locale; import junit.framework.Assert; @@ -85,26 +83,18 @@ /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih); query.setRewriteMethod(method); return query; } /** macro for readability */ - public static Query csrq(String f, String l, String h, boolean il, - boolean ih, Collator c) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); - query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - return query; - } - - /** macro for readability */ public static Query cspq(Term prefix) { PrefixQuery query = new PrefixQuery(prefix); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); @@ -133,14 +123,6 @@ "data", "pr*t?j"))); } - public void testBasicsRngCollating() throws IOException { - Collator c = Collator.getInstance(Locale.ENGLISH); - QueryUtils.check(csrq("data", "1", "6", T, T, c)); - QueryUtils.check(csrq("data", "A", "Z", T, T, c)); - QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", - "Z", T, T, c)); - } - public void testEqualScores() throws IOException { // NOTE: uses index build in *this* setUp @@ -248,7 +230,7 @@ // first do a regular TermRangeQuery which uses term expansion so // docs with more terms in range get higher scores - Query rq = new TermRangeQuery("data", "1", "4", T, T); + Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T); ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; int numHits = expected.length; @@ -397,89 +379,6 @@ assertEquals("med,med,T,T", 1, result.length); } - public void testRangeQueryIdCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test id, bounded on both ends - - result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs - 1, result.length); - - result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs - 1, result.length); - - result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs - 2, result.length); - - result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("med and up", 1 + maxId - medId, result.length); - - result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("up to med", 1 + medId - minId, result.length); - - // unbounded id - - result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs - 1, result.length); - - result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs - 1, result.length); - - result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId - medId, result.length); - - result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId - minId, result.length); - - // very small sets - - result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("med,med,F,F,c", 0, result.length); - result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - - result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("med,med,T,T,c", 1, result.length); - } - public void testRangeQueryRand() throws IOException { // NOTE: uses index build in *super* setUp @@ -541,146 +440,4 @@ assertEquals("max,nul,T,T", 1, result.length); } - - public void testRangeQueryRandCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = unsignedIndexReader; - IndexSearcher search = new IndexSearcher(reader); - - String minRP = pad(unsignedIndexDir.minR); - String maxRP = pad(unsignedIndexDir.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test extremes, bounded on both ends - - result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs - 1, result.length); - - result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs - 1, result.length); - - result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs - 2, result.length); - - // unbounded - - result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs - 1, result.length); - - result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs - 1, result.length); - - // very small sets - - result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - } - - public void testFarsi() throws Exception { - - /* build an index */ - MockRAMDirectory farsiIndex = newDirectory(rand); - RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true)); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc - .add(new Field("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = new IndexSearcher(reader); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not supported). - ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, - c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, - 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - search.close(); - reader.close(); - farsiIndex.close(); - } - - public void testDanish() throws Exception { - - /* build an index */ - MockRAMDirectory danishIndex = newDirectory(rand); - RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true)); - - // Danish collation orders the words below in the given order - // (example taken from TestSort.testInternationalSort() ). - String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - for (int docnum = 0 ; docnum < words.length ; ++docnum) { - Document doc = new Document(); - doc.add(new Field("content", words[docnum], - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - } - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = new IndexSearcher(reader); - - Collator c = Collator.getInstance(new Locale("da", "dk")); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - ScoreDoc[] result = search.search - (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - result = search.search - (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - search.close(); - reader.close(); - danishIndex.close(); - } } Index: lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (working copy) @@ -66,7 +66,7 @@ if (lower>upper) { int a=lower; lower=upper; upper=a; } - TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true); + TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true); NumericRangeQuery tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.search(cq, 1); TopDocs nrTopDocs = searcher.search(tq, 1); Index: lucene/src/test/org/apache/lucene/search/TestDateFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestDateFilter.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestDateFilter.java (working copy) @@ -66,12 +66,12 @@ // filter that should preserve matches // DateFilter df1 = DateFilter.Before("datefield", now); - TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches // DateFilter df2 = DateFilter.Before("datefield", now - 999999); - TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(0, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); @@ -133,13 +133,13 @@ // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); - TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false); // filter that should discard matches // DateFilter df2 = DateFilter.After("datefield", now + 999999); - TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.timeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true); Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -343,12 +343,10 @@ final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); NumericUtils.intToPrefixCoded(lower, 0, lowerBytes); NumericUtils.intToPrefixCoded(upper, 0, upperBytes); - // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! - final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -356,7 +354,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, lowerString, upperString, false, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -364,7 +362,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, lowerString, upperString, false, true); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -372,7 +370,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, lowerString, upperString, true, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -362,12 +362,10 @@ final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); NumericUtils.longToPrefixCoded(lower, 0, lowerBytes); NumericUtils.longToPrefixCoded(upper, 0, upperBytes); - // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! - final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -375,7 +373,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, lowerString, upperString, false, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -383,7 +381,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, lowerString, upperString, false, true); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -391,7 +389,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, lowerString, upperString, true, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (working copy) @@ -186,7 +186,7 @@ * This tests FilteredQuery's rewrite correctness */ public void testRangeQuery() throws Exception { - TermRangeQuery rq = new TermRangeQuery( + TermRangeQuery rq = TermRangeQuery.newStringRange( "sorter", "b", "d", true, true); Query filteredquery = new FilteredQuery(rq, filter); Index: lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (revision 986557) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (working copy) @@ -59,189 +59,88 @@ // test id, bounded on both ends - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("find all", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, F), numDocs).scoreDocs; assertEquals("all but last", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, T), numDocs).scoreDocs; assertEquals("all but first", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, F), numDocs).scoreDocs; assertEquals("all but ends", numDocs - 2, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("med and up", 1 + maxId - medId, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, T, T), numDocs).scoreDocs; assertEquals("up to med", 1 + medId - minId, result.length); // unbounded id - result = search.search(q, new TermRangeFilter("id", minIP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, T, F), numDocs).scoreDocs; assertEquals("min and up", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, T), numDocs).scoreDocs; assertEquals("max and down", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, null, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, F, F), numDocs).scoreDocs; assertEquals("not min, but up", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, F), numDocs).scoreDocs; assertEquals("not max, but down", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, F), numDocs).scoreDocs; assertEquals("med and up, not max", maxId - medId, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, F, T), numDocs).scoreDocs; assertEquals("not min, up to med", medId - minId, result.length); // very small sets - result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, F, F), numDocs).scoreDocs; assertEquals("min,min,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, F, F), numDocs).scoreDocs; assertEquals("med,med,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, F, F), numDocs).scoreDocs; assertEquals("max,max,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, T, T), numDocs).scoreDocs; assertEquals("min,min,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", null, minIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", null, minIP, F, T), numDocs).scoreDocs; assertEquals("nul,min,F,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("max,max,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, null, T, F), numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, T, T), numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); } - public void testRangeFilterIdCollating() throws IOException { - - IndexReader reader = signedIndexReader; - IndexSearcher search = new IndexSearcher(reader); - - Collator c = Collator.getInstance(Locale.ENGLISH); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - Query q = new TermQuery(new Term("body", "body")); - - // test id, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, - T, c), 1000).totalHits; - assertEquals("find all", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; - assertEquals("all but last", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; - assertEquals("all but first", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; - assertEquals("all but ends", numDocs - 2, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; - assertEquals("med and up", 1 + maxId - medId, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; - assertEquals("up to med", 1 + medId - minId, numHits); - - // unbounded id - - numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c), - 1000).totalHits; - assertEquals("min and up", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c), - 1000).totalHits; - assertEquals("max and down", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c), - 1000).totalHits; - assertEquals("not min, but up", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c), - 1000).totalHits; - assertEquals("not max, but down", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; - assertEquals("med and up, not max", maxId - medId, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; - assertEquals("not min, up to med", medId - minId, numHits); - - // very small sets - - numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; - assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; - assertEquals("med,med,F,F", 0, numHits); - numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; - assertEquals("max,max,F,F", 0, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; - assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c), - 1000).totalHits; - assertEquals("nul,min,F,T", 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; - assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c), - 1000).totalHits; - assertEquals("max,nul,T,T", 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; - assertEquals("med,med,T,T", 1, numHits); - } - public void testRangeFilterRand() throws IOException { IndexReader reader = signedIndexReader; @@ -259,217 +158,62 @@ // test extremes, bounded on both ends - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, T), numDocs).scoreDocs; assertEquals("find all", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, F), numDocs).scoreDocs; assertEquals("all but biggest", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, T), numDocs).scoreDocs; assertEquals("all but smallest", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, F), numDocs).scoreDocs; assertEquals("all but extremes", numDocs - 2, result.length); // unbounded - result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, T, F), numDocs).scoreDocs; assertEquals("smallest and up", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, T), numDocs).scoreDocs; assertEquals("biggest and down", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, F, F), numDocs).scoreDocs; assertEquals("not smallest, but up", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, F), numDocs).scoreDocs; assertEquals("not biggest, but down", numDocs - 1, result.length); // very small sets - result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, F, F), numDocs).scoreDocs; assertEquals("min,min,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, F, F), numDocs).scoreDocs; assertEquals("max,max,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, T, T), numDocs).scoreDocs; assertEquals("min,min,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, minRP, F, T), numDocs).scoreDocs; assertEquals("nul,min,F,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, T, T), numDocs).scoreDocs; assertEquals("max,max,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, null, T, F), numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); } - - public void testRangeFilterRandCollating() throws IOException { - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = unsignedIndexReader; - IndexSearcher search = new IndexSearcher(reader); - - Collator c = Collator.getInstance(Locale.ENGLISH); - - String minRP = pad(unsignedIndexDir.minR); - String maxRP = pad(unsignedIndexDir.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - Query q = new TermQuery(new Term("body", "body")); - - // test extremes, bounded on both ends - - int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, - T, c), 1000).totalHits; - assertEquals("find all", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, - c), 1000).totalHits; - assertEquals("all but biggest", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, - c), 1000).totalHits; - assertEquals("all but smallest", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, - c), 1000).totalHits; - assertEquals("all but extremes", numDocs - 2, numHits); - - // unbounded - - numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; - assertEquals("smallest and up", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; - assertEquals("biggest and down", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; - assertEquals("not smallest, but up", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; - assertEquals("not biggest, but down", numDocs - 1, numHits); - - // very small sets - - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F, - c), 1000).totalHits; - assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, - c), 1000).totalHits; - assertEquals("max,max,F,F", 0, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T, - c), 1000).totalHits; - assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, - new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; - assertEquals("nul,min,F,T", 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, - c), 1000).totalHits; - assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, - new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; - assertEquals("max,nul,T,T", 1, numHits); - } - - public void testFarsi() throws Exception { - - /* build an index */ - MockRAMDirectory farsiIndex = newDirectory(rand); - RandomIndexWriter writer = new RandomIndexWriter(rand, farsiIndex); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc - .add(new Field("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = new IndexSearcher(reader); - Query q = new TermQuery(new Term("body", "body")); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator collator = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeFilter with a Farsi - // Collator (or an Arabic one for the case when Farsi is not supported). - int numHits = search.search(q, new TermRangeFilter("content", "\u062F", - "\u0698", T, T, collator), 1000).totalHits; - assertEquals("The index Term should not be included.", 0, numHits); - - numHits = search.search(q, new TermRangeFilter("content", "\u0633", - "\u0638", T, T, collator), 1000).totalHits; - assertEquals("The index Term should be included.", 1, numHits); - search.close(); - reader.close(); - farsiIndex.close(); - } - - public void testDanish() throws Exception { - - /* build an index */ - MockRAMDirectory danishIndex = newDirectory(rand); - RandomIndexWriter writer = new RandomIndexWriter(rand, danishIndex); - // Danish collation orders the words below in the given order - // (example taken from TestSort.testInternationalSort() ). - String[] words = {"H\u00D8T", "H\u00C5T", "MAND"}; - for (int docnum = 0; docnum < words.length; ++docnum) { - Document doc = new Document(); - doc.add(new Field("content", words[docnum], Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(new Field("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - } - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = new IndexSearcher(reader); - Query q = new TermQuery(new Term("body", "body")); - - Collator collator = Collator.getInstance(new Locale("da", "dk")); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T", - "MAND", F, F, collator), 1000).totalHits; - assertEquals("The index Term should be included.", 1, numHits); - - numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T", - "MAND", F, F, collator), 1000).totalHits; - assertEquals("The index Term should not be included.", 0, numHits); - search.close(); - reader.close(); - danishIndex.close(); - } } Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (revision 986557) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (working copy) @@ -150,9 +150,9 @@ // maps field names to date resolutions Map fieldToDateResolution = null; - // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys. - Collator rangeCollator = null; + // Whether or not to analyze range terms when constructing RangeQuerys + // (For example, analyzing terms into collation keys for locale-sensitive RangeQuery) + boolean analyzeRangeTerms = false; /** @deprecated remove when getFieldQuery is removed */ private static final VirtualMethod getFieldQueryMethod = @@ -478,27 +478,21 @@ } /** - * Sets the collator used to determine index term inclusion in ranges - * for RangeQuerys. - *

- * WARNING: Setting the rangeCollator to a non-null - * collator using this method will cause every single index Term in the - * Field referenced by lowerTerm and/or upperTerm to be examined. - * Depending on the number of index Terms in this Field, the operation could - * be very slow. + * Set whether or not to analyze range terms when constructing RangeQuerys. + * For example, setting this to true can enable analyzing terms into + * collation keys for locale-sensitive RangeQuery. * - * @param rc the collator to use when constructing RangeQuerys + * @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys */ - public void setRangeCollator(Collator rc) { - rangeCollator = rc; + public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) { + this.analyzeRangeTerms = analyzeRangeTerms; } /** - * @return the collator used to determine index term inclusion in ranges - * for RangeQuerys. + * @return whether or not to analyze range terms when constructing RangeQuerys. */ - public Collator getRangeCollator() { - return rangeCollator; + public boolean getAnalyzeRangeTerms() { + return analyzeRangeTerms; } protected void addClause(List clauses, int conj, int mods, Query q) { @@ -873,6 +867,36 @@ return new FuzzyQuery(term,minimumSimilarity,prefixLength); } + private BytesRef analyzeRangePart(String field, String part) { + TokenStream source; + + try { + source = analyzer.reusableTokenStream(field, new StringReader(part)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(part)); + } + + BytesRef result = new BytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + + try { + if (!source.incrementToken()) + throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + termAtt.toBytesRef(result); + if (source.incrementToken()) + throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part); + } catch (IOException e) { + throw new RuntimeException("error analyzing range part: " + part, e); + } + + try { + source.close(); + } catch (IOException ignored) {} + + return result; + } + /** * Builds a new TermRangeQuery instance * @param field Field @@ -882,7 +906,14 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + // TODO: support BytesRef in TermRangeQuery + if (analyzeRangeTerms && part1 != null) + part1 = analyzeRangePart(field, part1).utf8ToString(); + + if (analyzeRangeTerms && part2 != null) + part2 = analyzeRangePart(field, part2).utf8ToString(); + + final TermRangeQuery query = TermRangeQuery.newStringRange(field, part1, part2, inclusive, inclusive); query.setRewriteMethod(multiTermRewriteMethod); return query; } @@ -1569,6 +1600,12 @@ finally { jj_save(0, xla); } } + private boolean jj_3R_2() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + private boolean jj_3_1() { Token xsp; xsp = jj_scanpos; @@ -1585,12 +1622,6 @@ return false; } - private boolean jj_3R_2() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ Index: lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 986557) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -174,9 +174,9 @@ // maps field names to date resolutions Map fieldToDateResolution = null; - // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys. - Collator rangeCollator = null; + // Whether or not to analyze range terms when constructing RangeQuerys + // (For example, analyzing terms into collation keys for locale-sensitive RangeQuery) + boolean analyzeRangeTerms = false; /** @deprecated remove when getFieldQuery is removed */ private static final VirtualMethod getFieldQueryMethod = @@ -502,27 +502,21 @@ } /** - * Sets the collator used to determine index term inclusion in ranges - * for RangeQuerys. - *

- * WARNING: Setting the rangeCollator to a non-null - * collator using this method will cause every single index Term in the - * Field referenced by lowerTerm and/or upperTerm to be examined. - * Depending on the number of index Terms in this Field, the operation could - * be very slow. + * Set whether or not to analyze range terms when constructing RangeQuerys. + * For example, setting this to true can enable analyzing terms into + * collation keys for locale-sensitive RangeQuery. * - * @param rc the collator to use when constructing RangeQuerys + * @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys */ - public void setRangeCollator(Collator rc) { - rangeCollator = rc; + public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) { + this.analyzeRangeTerms = analyzeRangeTerms; } /** - * @return the collator used to determine index term inclusion in ranges - * for RangeQuerys. + * @return whether or not to analyze range terms when constructing RangeQuerys. */ - public Collator getRangeCollator() { - return rangeCollator; + public boolean getAnalyzeRangeTerms() { + return analyzeRangeTerms; } protected void addClause(List clauses, int conj, int mods, Query q) { @@ -897,6 +891,36 @@ return new FuzzyQuery(term,minimumSimilarity,prefixLength); } + private BytesRef analyzeRangePart(String field, String part) { + TokenStream source; + + try { + source = analyzer.reusableTokenStream(field, new StringReader(part)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(part)); + } + + BytesRef result = new BytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + + try { + if (!source.incrementToken()) + throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + termAtt.toBytesRef(result); + if (source.incrementToken()) + throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part); + } catch (IOException e) { + throw new RuntimeException("error analyzing range part: " + part, e); + } + + try { + source.close(); + } catch (IOException ignored) {} + + return result; + } + /** * Builds a new TermRangeQuery instance * @param field Field @@ -906,7 +930,14 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + // TODO: support BytesRef in TermRangeQuery + if (analyzeRangeTerms && part1 != null) + part1 = analyzeRangePart(field, part1).utf8ToString(); + + if (analyzeRangeTerms && part2 != null) + part2 = analyzeRangePart(field, part2).utf8ToString(); + + final TermRangeQuery query = TermRangeQuery.newStringRange(field, part1, part2, inclusive, inclusive); query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (working copy) @@ -20,9 +20,6 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.BytesRef; -import java.text.Collator; -import java.util.Locale; - /** * Expert: Collects sorted results from Searchable's and collates them. * The elements put into this queue must be of type FieldDoc. @@ -35,11 +32,6 @@ volatile SortField[] fields = null; - // used in the case where the fields are sorted by locale - // based strings - volatile Collator[] collators = null; - - /** * Creates a hit queue sorted by the given list of fields. * @param fields Fieldable names, in priority order (highest priority first). @@ -60,7 +52,6 @@ */ void setFields (SortField[] fields) { this.fields = fields; - this.collators = hasCollators (fields); } @@ -69,24 +60,6 @@ return fields; } - - /** Returns an array of collators, possibly null. The collators - * correspond to any SortFields which were given a specific locale. - * @param fields Array of sort fields. - * @return Array, possibly null. - */ - private Collator[] hasCollators (final SortField[] fields) { - if (fields == null) return null; - Collator[] ret = new Collator[fields.length]; - for (int i=0; ia is less relevant than b. * @param a ScoreDoc @@ -109,11 +82,9 @@ c = (s2 == null) ? 0 : -1; } else if (s2 == null) { c = 1; - } else if (fields[i].getLocale() == null) { - c = s1.compareTo(s2); } else { - c = collators[i].compare(s1.utf8ToString(), s2.utf8ToString()); - } + c = s1.compareTo(s2); + } } else { c = docA.fields[i].compareTo(docB.fields[i]); if (type == SortField.SCORE) { Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; import java.util.Comparator; import org.apache.lucene.index.IndexReader; @@ -32,10 +31,6 @@ * greater than all that precede it.

*/ public class TermRangeTermsEnum extends FilteredTermsEnum { - - private Collator collator; - private String upperTermText; - private String lowerTermText; private boolean includeLower; private boolean includeUpper; final private BytesRef lowerBytesRef; @@ -54,79 +49,60 @@ * @param reader * @param field * An interned field that holds both lower and upper terms. - * @param lowerTermText + * @param lowerTerm * The term text at the lower end of the range - * @param upperTermText + * @param upperTerm * The term text at the upper end of the range * @param includeLower * If true, the lowerTerm is included in the range. * @param includeUpper * If true, the upperTerm is included in the range. - * @param collator - * The collator to use to collate index Terms, to determine their - * membership in the range bounded by lowerTerm and - * upperTerm. * * @throws IOException */ - public TermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, - boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + public TermRangeTermsEnum(IndexReader reader, String field, BytesRef lowerTerm, BytesRef upperTerm, + boolean includeLower, boolean includeUpper) throws IOException { super(reader, field); - this.collator = collator; - this.upperTermText = upperTermText; - this.lowerTermText = lowerTermText; this.includeLower = includeLower; this.includeUpper = includeUpper; // do a little bit of normalization... // open ended range queries should always be inclusive. - if (this.lowerTermText == null) { - this.lowerTermText = ""; + if (lowerTerm == null) { + this.lowerBytesRef = new BytesRef(); this.includeLower = true; + } else { + this.lowerBytesRef = lowerTerm; } - lowerBytesRef = new BytesRef(this.lowerTermText); - if (this.upperTermText == null) { + if (upperTerm == null) { this.includeUpper = true; upperBytesRef = null; } else { - upperBytesRef = new BytesRef(upperTermText); + upperBytesRef = upperTerm; } - BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef(""); + BytesRef startBytesRef = lowerBytesRef; setInitialSeekTerm(startBytesRef); termComp = getComparator(); } @Override protected AcceptStatus accept(BytesRef term) { - if (collator == null) { - if (!this.includeLower && term.equals(lowerBytesRef)) - return AcceptStatus.NO; - // Use this field's default sort ordering - if (upperBytesRef != null) { - final int cmp = termComp.compare(upperBytesRef, term); - /* - * if beyond the upper term, or is exclusive and this is equal to - * the upper term, break out - */ - if ((cmp < 0) || - (!includeUpper && cmp==0)) { - return AcceptStatus.END; - } - } - return AcceptStatus.YES; - } else { - if ((includeLower - ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 - : collator.compare(term.utf8ToString(), lowerTermText) > 0) - && (upperTermText == null - || (includeUpper - ? collator.compare(term.utf8ToString(), upperTermText) <= 0 - : collator.compare(term.utf8ToString(), upperTermText) < 0))) { - return AcceptStatus.YES; - } + if (!this.includeLower && term.equals(lowerBytesRef)) return AcceptStatus.NO; + // Use this field's default sort ordering + if (upperBytesRef != null) { + final int cmp = termComp.compare(upperBytesRef, term); + /* + * if beyond the upper term, or is exclusive and this is equal to + * the upper term, break out + */ + if ((cmp < 0) || + (!includeUpper && cmp==0)) { + return AcceptStatus.END; + } } + return AcceptStatus.YES; } } Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.search; +import org.apache.lucene.util.BytesRef; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -17,15 +19,13 @@ * limitations under the License. */ -import java.text.Collator; - /** * A Filter that restricts search results to a range of term * values in a given field. * *

This filter matches the documents looking for terms that fall into the * supplied range according to {@link - * String#compareTo(String)}, unless a Collator is provided. It is not intended + * Byte#compareTo(Byte)}, It is not intended * for numerical ranges; use {@link NumericRangeFilter} instead. * *

If you construct a large number of range filters with different ranges but on the @@ -44,39 +44,25 @@ * lowerTerm is null and includeLower is true (similar for upperTerm * and includeUpper) */ - public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm, + public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper)); } /** - * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The lower bound on this range - * @param upperTerm The upper bound on this range - * @param includeLower Does this range include the lower bound? - * @param includeUpper Does this range include the upper bound? - * @param collator The collator to use when determining range inclusion; set - * to null to use Unicode code point ordering instead of collation. - * @throws IllegalArgumentException if both terms are null or if - * lowerTerm is null and includeLower is true (similar for upperTerm - * and includeUpper) + * Factory that creates a new TermRangeFilter using Strings for term text. */ - public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm, - boolean includeLower, boolean includeUpper, - Collator collator) { - super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm); + BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm); + return new TermRangeFilter(field, lower, upper, includeLower, includeUpper); } - + /** * Constructs a filter for field fieldName matching * less than or equal to upperTerm. */ - public static TermRangeFilter Less(String fieldName, String upperTerm) { + public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) { return new TermRangeFilter(fieldName, null, upperTerm, false, true); } @@ -84,22 +70,19 @@ * Constructs a filter for field fieldName matching * greater than or equal to lowerTerm. */ - public static TermRangeFilter More(String fieldName, String lowerTerm) { + public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) { return new TermRangeFilter(fieldName, lowerTerm, null, true, false); } /** Returns the lower value of this range filter */ - public String getLowerTerm() { return query.getLowerTerm(); } + public BytesRef getLowerTerm() { return query.getLowerTerm(); } /** Returns the upper value of this range filter */ - public String getUpperTerm() { return query.getUpperTerm(); } + public BytesRef getUpperTerm() { return query.getUpperTerm(); } /** Returns true if the lower endpoint is inclusive */ public boolean includesLower() { return query.includesLower(); } /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return query.includesUpper(); } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return query.getCollator(); } } Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -18,12 +18,12 @@ */ import java.io.IOException; -import java.text.Collator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; /** @@ -31,7 +31,7 @@ * *

This query matches the documents looking for terms that fall into the * supplied range according to {@link - * String#compareTo(String)}, unless a Collator is provided. It is not intended + * Byte#compareTo(Byte)}. It is not intended * for numerical ranges; use {@link NumericRangeQuery} instead. * *

This query uses the {@link @@ -41,13 +41,11 @@ */ public class TermRangeQuery extends MultiTermQuery { - private String lowerTerm; - private String upperTerm; - private Collator collator; + private BytesRef lowerTerm; + private BytesRef upperTerm; private boolean includeLower; private boolean includeUpper; - /** * Constructs a query selecting all terms greater/equal than lowerTerm * but less/equal than upperTerm. @@ -70,78 +68,48 @@ * If true, the upperTerm is * included in the range. */ - public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { - this(field, lowerTerm, upperTerm, includeLower, includeUpper, null); - } - - /** Constructs a query selecting all terms greater/equal than - * lowerTerm but less/equal than upperTerm. - *

- * If an endpoint is null, it is said - * to be "open". Either or both endpoints may be open. Open endpoints may not - * be exclusive (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - *

- * If collator is not null, it will be used to decide whether - * index terms are within the given range, rather than using the Unicode code - * point order in which index terms are stored. - *

- * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The Term text at the lower end of the range - * @param upperTerm The Term text at the upper end of the range - * @param includeLower - * If true, the lowerTerm is - * included in the range. - * @param includeUpper - * If true, the upperTerm is - * included in the range. - * @param collator The collator to use to collate index Terms, to determine - * their membership in the range bounded by lowerTerm and - * upperTerm. - */ - public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, - Collator collator) { + public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { super(field); this.lowerTerm = lowerTerm; this.upperTerm = upperTerm; this.includeLower = includeLower; this.includeUpper = includeUpper; - this.collator = collator; } + + /** + * Factory that creates a new TermRangeQuery using Strings for term text. + */ + public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm); + BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm); + return new TermRangeQuery(field, lower, upper, includeLower, includeUpper); + } /** Returns the lower value of this range query */ - public String getLowerTerm() { return lowerTerm; } + public BytesRef getLowerTerm() { return lowerTerm; } /** Returns the upper value of this range query */ - public String getUpperTerm() { return upperTerm; } + public BytesRef getUpperTerm() { return upperTerm; } /** Returns true if the lower endpoint is inclusive */ public boolean includesLower() { return includeLower; } /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return includeUpper; } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return collator; } @Override protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { - if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { + if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { return TermsEnum.EMPTY; } - if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) { + if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) { // NOTE: for now, MultiTermQuery enums terms at the // MultiReader level, so we must use MultiFields here: final Terms terms = MultiFields.getTerms(reader, field); return (terms != null) ? terms.iterator() : null; } return new TermRangeTermsEnum(reader, field, - lowerTerm, upperTerm, includeLower, includeUpper, collator); + lowerTerm, upperTerm, includeLower, includeUpper); } /** @deprecated */ @@ -159,9 +127,9 @@ buffer.append(":"); } buffer.append(includeLower ? '[' : '{'); - buffer.append(lowerTerm != null ? lowerTerm : "*"); + buffer.append(lowerTerm != null ? lowerTerm.utf8ToString() : "*"); buffer.append(" TO "); - buffer.append(upperTerm != null ? upperTerm : "*"); + buffer.append(upperTerm != null ? upperTerm.utf8ToString() : "*"); buffer.append(includeUpper ? ']' : '}'); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); @@ -171,7 +139,6 @@ public int hashCode() { final int prime = 31; int result = super.hashCode(); - result = prime * result + ((collator == null) ? 0 : collator.hashCode()); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); @@ -188,11 +155,6 @@ if (getClass() != obj.getClass()) return false; TermRangeQuery other = (TermRangeQuery) obj; - if (collator == null) { - if (other.collator != null) - return false; - } else if (!collator.equals(other.collator)) - return false; if (includeLower != other.includeLower) return false; if (includeUpper != other.includeUpper) @@ -209,5 +171,4 @@ return false; return true; } - } Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -618,83 +618,6 @@ } } - /** Sorts by a field's value using the Collator for a - * given Locale. - * - *

WARNING: this is likely very slow; you'll - * get much better performance using the - * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */ - public static final class StringComparatorLocale extends FieldComparator { - - private final String[] values; - private DocTerms currentDocTerms; - private final String field; - final Collator collator; - private String bottom; - private final BytesRef tempBR = new BytesRef(); - - StringComparatorLocale(int numHits, String field, Locale locale) { - values = new String[numHits]; - this.field = field; - collator = Collator.getInstance(locale); - } - - @Override - public int compare(int slot1, int slot2) { - final String val1 = values[slot1]; - final String val2 = values[slot2]; - if (val1 == null) { - if (val2 == null) { - return 0; - } - return -1; - } else if (val2 == null) { - return 1; - } - return collator.compare(val1, val2); - } - - @Override - public int compareBottom(int doc) { - final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString(); - if (bottom == null) { - if (val2 == null) { - return 0; - } - return -1; - } else if (val2 == null) { - return 1; - } - return collator.compare(bottom, val2); - } - - @Override - public void copy(int slot, int doc) { - final BytesRef br = currentDocTerms.getTerm(doc, tempBR); - if (br == null) { - values[slot] = null; - } else { - values[slot] = br.utf8ToString(); - } - } - - @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field); - } - - @Override - public void setBottom(final int bottom) { - this.bottom = values[bottom]; - } - - @Override - public Comparable value(int slot) { - final String s = values[slot]; - return s == null ? null : new BytesRef(values[slot]); - } - } - /** Sorts by field's natural Term sort order, using * ordinals. This is functionally equivalent to {@link * TermValComparator}, but it first resolves the string Index: lucene/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SortField.java (revision 986557) +++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Serializable; -import java.util.Locale; import org.apache.lucene.util.StringHelper; @@ -91,7 +90,6 @@ private String field; private int type; // defaults to determining type dynamically - private Locale locale; // defaults to "natural order" (no Locale) boolean reverse = false; // defaults to natural order private FieldCache.Parser parser; @@ -159,27 +157,6 @@ this.parser = parser; } - /** Creates a sort by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. - */ - public SortField (String field, Locale locale) { - initFieldType(field, STRING); - this.locale = locale; - } - - /** Creates a sort, possibly in reverse, by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. - */ - public SortField (String field, Locale locale, boolean reverse) { - initFieldType(field, STRING); - this.locale = locale; - this.reverse = reverse; - } - /** Creates a sort with a custom comparison function. * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. @@ -227,14 +204,6 @@ return type; } - /** Returns the Locale by which term values are interpreted. - * May return null if no Locale was specified. - * @return Locale, or null. - */ - public Locale getLocale() { - return locale; - } - /** Returns the instance of a {@link FieldCache} parser that fits to the given sort type. * May return null if no parser was specified. Sorting is using the default parser then. * @return An instance of a {@link FieldCache} parser, or null. @@ -310,7 +279,6 @@ break; } - if (locale != null) buffer.append('(').append(locale).append(')'); if (parser != null) buffer.append('(').append(parser).append(')'); if (reverse) buffer.append('!'); @@ -330,7 +298,6 @@ other.field == this.field // field is always interned && other.type == this.type && other.reverse == this.reverse - && (other.locale == null ? this.locale == null : other.locale.equals(this.locale)) && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource)) && (other.parser == null ? this.parser == null : other.parser.equals(this.parser)) ); @@ -345,7 +312,6 @@ public int hashCode() { int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb; if (field != null) hash += field.hashCode()^0xff5685dd; - if (locale != null) hash += locale.hashCode()^0x08150815; if (comparatorSource != null) hash += comparatorSource.hashCode(); if (parser != null) hash += parser.hashCode()^0x3aaf56ff; return hash; @@ -371,14 +337,6 @@ * @return {@link FieldComparator} to use when sorting */ public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException { - - if (locale != null) { - // TODO: it'd be nice to allow FieldCache.getStringIndex - // to optionally accept a Locale so sorting could then use - // the faster StringComparator impls - return new FieldComparator.StringComparatorLocale(numHits, field, locale); - } - switch (type) { case SortField.SCORE: return new FieldComparator.RelevanceComparator(numHits); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 986557) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.text.Collator; import java.text.DateFormat; import java.util.Calendar; import java.util.Date; @@ -653,56 +652,6 @@ "gack (bar blar {a TO z})"); } - public void testFarsiRangeCollating() throws Exception { - Random random = newRandom(); - MockRAMDirectory ramDir = newDirectory(random); - IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - iw.addDocument(doc); - iw.close(); - IndexSearcher is = new IndexSearcher(ramDir, true); - - StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the - // Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - qp.setRangeCollator(c); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the - // single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not - // supported). - - // Test ConstantScoreRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), - null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - // Test RangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - is.close(); - ramDir.close(); - } - /** for testing legacy DateField support */ private String getLegacyDate(String s) throws Exception { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 986557) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.text.Collator; import java.text.DateFormat; import java.util.Arrays; import java.util.Calendar; @@ -651,53 +650,6 @@ assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); } - - public void testFarsiRangeCollating() throws Exception { - - MockRAMDirectory ramDir = newDirectory(newRandom()); - IndexWriter iw = new IndexWriter(ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), true, - IndexWriter.MaxFieldLength.LIMITED); - Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - iw.addDocument(doc); - iw.close(); - IndexSearcher is = new IndexSearcher(ramDir, true); - - QueryParserWrapper qp = new QueryParserWrapper("content", - new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - qp.setRangeCollator(c); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is not - // supported). - - // Test ConstantScoreRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - // Test RangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - is.close(); - ramDir.close(); - } private String escapeDateString(String s) { if (s.contains(" ")) { Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.text.Collator; import java.text.DateFormat; import java.util.Calendar; import java.util.Date; @@ -37,7 +36,6 @@ import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryParser.standard.config.DateResolutionAttribute; import org.apache.lucene.queryParser.standard.config.LocaleAttribute; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; /** @@ -55,12 +53,7 @@ * If a {@link DateResolutionAttribute} is defined and the {@link Resolution} is * not null it will also be used to parse the date value.
*
- * This processor will also try to retrieve a {@link RangeCollatorAttribute} - * from the {@link QueryConfigHandler}. If a {@link RangeCollatorAttribute} is - * found and the {@link Collator} is not null, it's set on the - * {@link RangeQueryNode}.
* - * @see RangeCollatorAttribute * @see DateResolutionAttribute * @see LocaleAttribute * @see RangeQueryNode @@ -80,17 +73,9 @@ ParametricQueryNode upper = parametricRangeNode.getUpperBound(); ParametricQueryNode lower = parametricRangeNode.getLowerBound(); Locale locale = Locale.getDefault(); - Collator collator = null; DateTools.Resolution dateRes = null; boolean inclusive = false; - if (getQueryConfigHandler().hasAttribute(RangeCollatorAttribute.class)) { - - collator = getQueryConfigHandler().getAttribute( - RangeCollatorAttribute.class).getRangeCollator(); - - } - if (getQueryConfigHandler().hasAttribute(LocaleAttribute.class)) { locale = getQueryConfigHandler().getAttribute(LocaleAttribute.class) @@ -158,7 +143,7 @@ lower.setText(part1); upper.setText(part2); - return new RangeQueryNode(lower, upper, collator); + return new RangeQueryNode(lower, upper); } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.text.Collator; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -45,7 +44,6 @@ import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttribute; import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler; import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser; import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; @@ -318,20 +316,6 @@ } - public Collator getRangeCollator() { - - if (this.config != null - && this.config.hasAttribute(RangeCollatorAttribute.class)) { - - return this.config.getAttribute(RangeCollatorAttribute.class) - .getRangeCollator(); - - } - - return null; - - } - public boolean getUseOldRangeQuery() { if (getMultiTermRewriteMethod() == MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { return true; @@ -421,10 +405,6 @@ this.qpHelper.setDefaultPhraseSlop(phraseSlop); } - public void setRangeCollator(Collator rc) { - this.qpHelper.setRangeCollator(rc); - } - public void setUseOldRangeQuery(boolean useOldRangeQuery) { if (useOldRangeQuery) { setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.text.Collator; import java.util.Locale; import java.util.Map; import java.util.TooManyListenersException; @@ -41,10 +40,8 @@ import org.apache.lucene.queryParser.standard.config.MultiFieldAttribute; import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler; import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; -import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser; import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; import org.apache.lucene.search.FuzzyQuery; @@ -188,32 +185,6 @@ } /** - * Sets the collator used to determine index term inclusion in ranges for - * RangeQuerys. - *

- * WARNING: Setting the rangeCollator to a non-null collator - * using this method will cause every single index Term in the Field - * referenced by lowerTerm and/or upperTerm to be examined. Depending on the - * number of index Terms in this Field, the operation could be very slow. - * - * @param collator - * the collator to use when constructing {@link RangeQueryNode}s - */ - public void setRangeCollator(Collator collator) { - RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class); - attr.setDateResolution(collator); - } - - /** - * @return the collator used to determine index term inclusion in ranges for - * RangeQuerys. - */ - public Collator getRangeCollator() { - RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class); - return attr.getRangeCollator(); - } - - /** * Sets the boolean operator of the QueryParser. In default mode ( * {@link Operator#OR}) terms without any modifiers are considered optional: * for example capital of Hungary is equal to Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (working copy) @@ -1,94 +0,0 @@ -package org.apache.lucene.queryParser.standard.config; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.Collator; - -import org.apache.lucene.queryParser.core.config.QueryConfigHandler; -import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.AttributeImpl; - -/** - * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor - * and must be defined in the {@link QueryConfigHandler}. This attribute tells - * the processor which {@link Collator} should be used for a - * {@link TermRangeQuery}
- * - * @see org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute - */ -public class RangeCollatorAttributeImpl extends AttributeImpl - implements RangeCollatorAttribute { - - private static final long serialVersionUID = -6804360312723049526L; - - private Collator rangeCollator; - - public RangeCollatorAttributeImpl() { - rangeCollator = null; // default value for 2.4 - } - - public void setDateResolution(Collator rangeCollator) { - this.rangeCollator = rangeCollator; - } - - public Collator getRangeCollator() { - return this.rangeCollator; - } - - @Override - public void clear() { - throw new UnsupportedOperationException(); - } - - @Override - public void copyTo(AttributeImpl target) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean equals(Object other) { - - if (other instanceof RangeCollatorAttributeImpl) { - RangeCollatorAttributeImpl rangeCollatorAttr = (RangeCollatorAttributeImpl) other; - - if (rangeCollatorAttr.rangeCollator == this.rangeCollator - || rangeCollatorAttr.rangeCollator.equals(this.rangeCollator)) { - - return true; - - } - - } - - return false; - - } - - @Override - public int hashCode() { - return (this.rangeCollator == null) ? 0 : this.rangeCollator.hashCode(); - } - - @Override - public String toString() { - return ""; - } - -} Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java (working copy) @@ -1,37 +0,0 @@ -package org.apache.lucene.queryParser.standard.config; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.Collator; - -import org.apache.lucene.queryParser.core.config.QueryConfigHandler; -import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.Attribute; - -/** - * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor - * and must be defined in the {@link QueryConfigHandler}. This attribute tells - * the processor which {@link Collator} should be used for a - * {@link TermRangeQuery}
- * - */ -public interface RangeCollatorAttribute extends Attribute { - public void setDateResolution(Collator rangeCollator); - public Collator getRangeCollator(); -} Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java (working copy) @@ -38,7 +38,6 @@ addFieldConfigListener(new FieldDateResolutionFCListener(this)); // Default Values - addAttribute(RangeCollatorAttribute.class); addAttribute(DefaultOperatorAttribute.class); addAttribute(AnalyzerAttribute.class); addAttribute(FuzzyAttribute.class); Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java (working copy) @@ -17,11 +17,8 @@ * limitations under the License. */ -import java.text.Collator; - import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; /** @@ -36,17 +33,12 @@ private static final long serialVersionUID = 7400866652044314657L; - private Collator collator; - /** * @param lower * @param upper */ - public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper, Collator collator) { + public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper) { super(lower, upper); - - this.collator = collator; - } @Override @@ -59,12 +51,4 @@ return sb.toString(); } - - /** - * @return the collator - */ - public Collator getCollator() { - return this.collator; - } - } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (working copy) @@ -53,9 +53,7 @@ String field = rangeNode.getField().toString(); - TermRangeQuery rangeQuery = new TermRangeQuery(field, lower - .getTextAsString(), upper.getTextAsString(), lowerInclusive, - upperInclusive, rangeNode.getCollator()); + TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodAttribute.TAG_ID); if (method != null) { Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (working copy) @@ -174,8 +174,8 @@ if (isPass2ResolvingPhrases) { // Must use old-style RangeQuery in order to produce a BooleanQuery // that can be turned into SpanOr clause - TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive, - getRangeCollator()); + // nocommit: analyze the range points if needed + TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, part1, part2, inclusive, inclusive); rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (working copy) @@ -426,7 +426,7 @@ } catch (Exception e) { } - return new TermRangeQuery(field, part1, part2, inclusive, inclusive); + return TermRangeQuery.newStringRange(field, part1, part2, inclusive, inclusive); } /** Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (revision 986557) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (working copy) @@ -450,7 +450,7 @@ } catch (Exception e) { } - return new TermRangeQuery(field, part1, part2, inclusive, inclusive); + return TermRangeQuery.newStringRange(field, part1, part2, inclusive, inclusive); } /** Index: lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java =================================================================== --- lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (revision 986557) +++ lucene/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java (working copy) @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; -import java.util.Locale; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; @@ -362,17 +361,7 @@ // up to this point, all of the searches should have "sane" // FieldCache behavior, and should have reused hte cache in several cases assertSaneFieldCaches(getName() + " Basics"); - // next we'll check an alternate Locale for string, so purge first FieldCache.DEFAULT.purgeAllCaches(); - - sort.setSort(new SortField ("string", Locale.US) ); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField ("string", Locale.US, true)); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - - assertSaneFieldCaches(getName() + " Locale.US"); - FieldCache.DEFAULT.purgeAllCaches(); } // make sure the documents returned by the search match the expected list Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java (revision 986557) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java (working copy) @@ -41,7 +41,7 @@ String upperTerm=e.getAttribute("upperTerm"); boolean includeLower=DOMUtils.getAttribute(e,"includeLower",true); boolean includeUpper=DOMUtils.getAttribute(e,"includeUpper",true); - return new TermRangeFilter(fieldName,lowerTerm,upperTerm,includeLower,includeUpper); + return TermRangeFilter.newStringRange(fieldName,lowerTerm,upperTerm,includeLower,includeUpper); } } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 986557) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -848,7 +848,7 @@ @Override public void run() throws Exception { numHighlights = 0; - TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true); + TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true); SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")), }; SpanNearQuery snq = new SpanNearQuery(clauses, 1, true); @@ -871,7 +871,7 @@ @Override public void run() throws Exception { numHighlights = 0; - TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true); + TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true); PhraseQuery pq = new PhraseQuery(); pq.add(new Term("contents", "john")); pq.add(new Term("contents", "kennedy")); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java (revision 986557) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java (working copy) @@ -90,7 +90,7 @@ //Date pastTheEnd = parseDate("2099 Jan 1"); // dateFilter = DateFilter.Before("date", pastTheEnd); // just treat dates as strings and select the whole range for now... - dateFilter = new TermRangeFilter("date","","ZZZZ",true,true); + dateFilter = TermRangeFilter.newStringRange("date","","ZZZZ",true,true); bobFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "bob"))); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (revision 986557) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (working copy) @@ -73,7 +73,7 @@ private Filter getRangeFilter(String field,String lowerPrice, String upperPrice) { - Filter f = new TermRangeFilter(field,lowerPrice,upperPrice,true,true); + Filter f = TermRangeFilter.newStringRange(field,lowerPrice,upperPrice,true,true); return f; } private Filter getTermsFilter(String field,String text) Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java (revision 0) @@ -0,0 +1,104 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified range parameters. + *

Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.

+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal than lowerTerm + * but less/equal than upperTerm. + * + * If an endpoint is null, it is said to be "open". Either or both + * endpoints may be open. Open endpoints may not be exclusive + * (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + * + * @param reader + * @param field + * An interned field that holds both lower and upper terms. + * @param lowerTermText + * The term text at the lower end of the range + * @param upperTermText + * The term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is included in the range. + * @param includeUpper + * If true, the upperTerm is included in the range. + * @param collator + * The collator to use to collate index Terms, to determine their + * membership in the range bounded by lowerTerm and + * upperTerm. + * + * @throws IOException + */ + public SlowCollatedTermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, + boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + super(reader, field); + this.collator = collator; + this.upperTermText = upperTermText; + this.lowerTermText = lowerTermText; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (this.lowerTermText == null) { + this.lowerTermText = ""; + this.includeLower = true; + } + + // TODO: optimize + BytesRef startBytesRef = new BytesRef(""); + setInitialSeekTerm(startBytesRef); + } + + @Override + protected AcceptStatus accept(BytesRef term) { + if ((includeLower + ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 + : collator.compare(term.utf8ToString(), lowerTermText) > 0) + && (upperTermText == null + || (includeUpper + ? collator.compare(term.utf8ToString(), upperTermText) <= 0 + : collator.compare(term.utf8ToString(), upperTermText) < 0))) { + return AcceptStatus.YES; + } + return AcceptStatus.NO; + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeTermsEnum.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0) @@ -0,0 +1,70 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +/** + * A Filter that restricts search results to a range of term + * values in a given field. + * + *

This filter matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeFilter} instead. + * + *

If you construct a large number of range filters with different ranges but on the + * same field, {@link FieldCacheRangeFilter} may have significantly better performance. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter { + /** + * + * @param lowerTerm The lower bound on this range + * @param upperTerm The upper bound on this range + * @param includeLower Does this range include the lower bound? + * @param includeUpper Does this range include the upper bound? + * @param collator The collator to use when determining range inclusion; set + * to null to use Unicode code point ordering instead of collation. + * @throws IllegalArgumentException if both terms are null or if + * lowerTerm is null and includeLower is true (similar for upperTerm + * and includeUpper) + */ + public SlowCollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, + Collator collator) { + super(new SlowCollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + } + + /** Returns the lower value of this range filter */ + public String getLowerTerm() { return query.getLowerTerm(); } + + /** Returns the upper value of this range filter */ + public String getUpperTerm() { return query.getUpperTerm(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return query.includesLower(); } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return query.includesUpper(); } + + /** Returns the collator used to determine range inclusion, if any. */ + public Collator getCollator() { return query.getCollator(); } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0) @@ -0,0 +1,177 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ToStringUtils; + +/** + * A Query that matches documents within an range of terms. + * + *

This query matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeQuery} instead. + * + *

This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeQuery extends MultiTermQuery { + private String lowerTerm; + private String upperTerm; + private boolean includeLower; + private boolean includeUpper; + private Collator collator; + + /** Constructs a query selecting all terms greater/equal than + * lowerTerm but less/equal than upperTerm. + *

+ * If an endpoint is null, it is said + * to be "open". Either or both endpoints may be open. Open endpoints may not + * be exclusive (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + *

+ * + * @param lowerTerm The Term text at the lower end of the range + * @param upperTerm The Term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is + * included in the range. + * @param includeUpper + * If true, the upperTerm is + * included in the range. + * @param collator The collator to use to collate index Terms, to determine + * their membership in the range bounded by lowerTerm and + * upperTerm. + */ + public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, Collator collator) { + super(field); + this.lowerTerm = lowerTerm; + this.upperTerm = upperTerm; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.collator = collator; + } + + /** Returns the lower value of this range query */ + public String getLowerTerm() { return lowerTerm; } + + /** Returns the upper value of this range query */ + public String getUpperTerm() { return upperTerm; } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return includeLower; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return includeUpper; } + + /** Returns the collator used to determine range inclusion */ + public Collator getCollator() { return collator; } + + @Override + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) { + return TermsEnum.EMPTY; + } + if (lowerTerm == null && upperTerm == null) { + // NOTE: debateably, the caller should never pass in a + // multi reader... + final Terms terms = MultiFields.getTerms(reader, field); + return (terms != null) ? terms.iterator() : null; + } + return new SlowCollatedTermRangeTermsEnum(reader, field, + lowerTerm, upperTerm, includeLower, includeUpper, collator); + } + + /** @deprecated */ + @Deprecated + public String field() { + return getField(); + } + + /** Prints a user-readable version of this query. */ + @Override + public String toString(String field) { + StringBuilder buffer = new StringBuilder(); + if (!getField().equals(field)) { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(includeLower ? '[' : '{'); + buffer.append(lowerTerm != null ? lowerTerm : "*"); + buffer.append(" TO "); + buffer.append(upperTerm != null ? upperTerm : "*"); + buffer.append(includeUpper ? ']' : '}'); + buffer.append(ToStringUtils.boost(getBoost())); + return buffer.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((collator == null) ? 0 : collator.hashCode()); + result = prime * result + (includeLower ? 1231 : 1237); + result = prime * result + (includeUpper ? 1231 : 1237); + result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); + result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj; + if (collator == null) { + if (other.collator != null) + return false; + } else if (!collator.equals(other.collator)) + return false; + if (includeLower != other.includeLower) + return false; + if (includeUpper != other.includeUpper) + return false; + if (lowerTerm == null) { + if (other.lowerTerm != null) + return false; + } else if (!lowerTerm.equals(other.lowerTerm)) + return false; + if (upperTerm == null) { + if (other.upperTerm != null) + return false; + } else if (!upperTerm.equals(other.upperTerm)) + return false; + return true; + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeQuery.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0) @@ -0,0 +1,105 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache.DocTerms; +import org.apache.lucene.util.BytesRef; + +/** Sorts by a field's value using the given Collator + * + *

WARNING: this is very slow; you'll + * get much better performance using the + * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public final class SlowCollatedStringComparator extends FieldComparator { + + private final String[] values; + private DocTerms currentDocTerms; + private final String field; + final Collator collator; + private String bottom; + private final BytesRef tempBR = new BytesRef(); + + SlowCollatedStringComparator(int numHits, String field, Collator collator) { + values = new String[numHits]; + this.field = field; + this.collator = collator; + } + + @Override + public int compare(int slot1, int slot2) { + final String val1 = values[slot1]; + final String val2 = values[slot2]; + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return collator.compare(val1, val2); + } + + @Override + public int compareBottom(int doc) { + final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString(); + if (bottom == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return collator.compare(bottom, val2); + } + + @Override + public void copy(int slot, int doc) { + final BytesRef br = currentDocTerms.getTerm(doc, tempBR); + if (br == null) { + values[slot] = null; + } else { + values[slot] = br.utf8ToString(); + } + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + currentDocTerms = FieldCache.DEFAULT.getTerms(reader, field); + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public Comparable value(int slot) { + final String s = values[slot]; + return s == null ? null : new BytesRef(values[slot]); + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedStringComparator.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 986557) +++ lucene/MIGRATE.txt (working copy) @@ -266,3 +266,7 @@ Likewise for DocsAndPositionsEnum. +LUCENE-2514: The option to use a Collator's order (instead of unicode order) for +TermRangeQuery/Filter has been moved to CollatedTermRangeQuery/Filter. +Note: this functionality isn't very scalable and if you are using it, consider +indexing collation keys with the collation support in the analysis module instead.