Index: solr/src/java/org/apache/solr/schema/DateField.java =================================================================== --- solr/src/java/org/apache/solr/schema/DateField.java (revision 1075074) +++ solr/src/java/org/apache/solr/schema/DateField.java (working copy) @@ -410,7 +410,7 @@ /** DateField specific range query */ public Query getRangeQuery(QParser parser, SchemaField sf, Date part1, Date part2, boolean minInclusive, boolean maxInclusive) { - return new TermRangeQuery( + return TermRangeQuery.newStringRange( sf.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/schema/FieldType.java =================================================================== --- solr/src/java/org/apache/solr/schema/FieldType.java (revision 1075074) +++ solr/src/java/org/apache/solr/schema/FieldType.java (working copy) @@ -521,7 +521,7 @@ */ public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { // constant score mode is now enabled per default - return new TermRangeQuery( + return TermRangeQuery.newStringRange( field.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/search/QueryParsing.java =================================================================== --- solr/src/java/org/apache/solr/search/QueryParsing.java (revision 1075074) +++ solr/src/java/org/apache/solr/search/QueryParsing.java (working copy) @@ -398,8 +398,8 @@ String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); - String lt = q.getLowerTerm(); - String ut = q.getUpperTerm(); + String lt = q.getLowerTerm().utf8ToString(); + String ut = q.getUpperTerm().utf8ToString(); if (lt == null) { out.append('*'); } else { Index: solr/src/java/org/apache/solr/handler/component/ShardDoc.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/ShardDoc.java (revision 1075074) +++ solr/src/java/org/apache/solr/handler/component/ShardDoc.java (working copy) @@ -95,10 +95,10 @@ String fieldname = fields[i].getField(); comparators[i] = getCachedComparator(fieldname, fields[i] - .getType(), fields[i].getLocale(), fields[i].getComparatorSource()); + .getType(), fields[i].getComparatorSource()); if (fields[i].getType() == SortField.STRING) { - this.fields[i] = new SortField(fieldname, fields[i].getLocale(), + this.fields[i] = new SortField(fieldname, SortField.STRING, fields[i].getReverse()); } else { this.fields[i] = new SortField(fieldname, fields[i].getType(), @@ -145,17 +145,14 @@ return c < 0; } - Comparator getCachedComparator(String fieldname, int type, Locale locale, FieldComparatorSource factory) { + Comparator getCachedComparator(String fieldname, int type, FieldComparatorSource factory) { Comparator comparator = null; switch (type) { case SortField.SCORE: comparator = comparatorScore(fieldname); break; case SortField.STRING: - if (locale != null) - comparator = comparatorStringLocale(fieldname, locale); - else - comparator = comparatorNatural(fieldname); + comparator = comparatorNatural(fieldname); break; case SortField.CUSTOM: if (factory instanceof MissingStringLastComparatorSource){ Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) @@ -20,6 +20,8 @@ import com.ibm.icu.text.Collator; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.BytesRef; import java.util.Locale; @@ -27,17 +29,23 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase { private Collator collator = Collator.getInstance(new Locale("fa")); - private Analyzer analyzer = new ICUCollationKeyAnalyzer(collator); + private Analyzer analyzer = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator); - private String firstRangeBeginning = encodeCollationKey + private BytesRef firstRangeBeginning = new BytesRef (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey + private BytesRef firstRangeEnd = new BytesRef (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey + private BytesRef secondRangeBeginning = new BytesRef (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey + private BytesRef secondRangeEnd = new BytesRef (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); - + + @Override + public void setUp() throws Exception { + super.setUp(); + assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec())); + } + public void testFarsiRangeFilterCollating() throws Exception { testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning, secondRangeEnd); @@ -62,13 +70,13 @@ // public void testCollationKeySort() throws Exception { Analyzer usAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(Locale.US)); + (TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(Locale.FRANCE)); + (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(new Locale("sv", "se"))); + (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se"))); Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(new Locale("da", "dk"))); + (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); // The ICU Collator and java.text.Collator implementations differ in their // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 1075074) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (working copy) @@ -22,24 +22,26 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.util.BytesRef; import java.io.Reader; import java.util.Locale; - +/** @deprecated remove this when ICUCollationKeyFilter is removed */ +@Deprecated public class TestICUCollationKeyFilter extends CollationTestBase { private Collator collator = Collator.getInstance(new Locale("fa")); private Analyzer analyzer = new TestAnalyzer(collator); - private String firstRangeBeginning = encodeCollationKey - (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey - (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey - (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey - (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray())); + private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeEndOriginal).toByteArray())); + private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray())); + private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeEndOriginal).toByteArray())); public final class TestAnalyzer extends Analyzer { Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) @@ -0,0 +1,96 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; + +import com.ibm.icu.text.Collator; + +/** + *

+ * Converts each token into its {@link com.ibm.icu.text.CollationKey}, and + * then encodes bytes as an index term. + *

+ *

+ * WARNING: Make sure you use exactly the same Collator at + * index and query time -- CollationKeys are only comparable when produced by + * the same Collator. {@link com.ibm.icu.text.RuleBasedCollator}s are + * independently versioned, so it is safe to search against stored + * CollationKeys if the following are exactly the same (best practice is + * to store this information with the index and check that they remain the + * same at query time): + *

+ *
    + *
  1. + * Collator version - see {@link Collator#getVersion()} + *
  2. + *
  3. + * The collation strength used - see {@link Collator#setStrength(int)} + *
  4. + *
+ *

+ * CollationKeys generated by ICU Collators are not compatible with those + * generated by java.text.Collators. Specifically, if you use + * ICUCollationAttributeFactory to generate index terms, do not use + * {@link CollationAttributeFactory} on the query side, or vice versa. + *

+ *

+ * ICUCollationAttributeFactory is significantly faster and generates significantly + * shorter keys than CollationAttributeFactory. See + * http://site.icu-project.org/charts/collation-icu4j-sun for key + * generation timing and key length comparisons between ICU4J and + * java.text.Collator over several languages. + *

+ */ +public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + private final AttributeSource.AttributeFactory delegate; + + /** + * Create an ICUCollationAttributeFactory, using + * {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the + * factory for all other attributes. + * @param collator CollationKey generator + */ + public ICUCollationAttributeFactory(Collator collator) { + this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator); + } + + /** + * Create an ICUCollationAttributeFactory, using the supplied Attribute + * Factory as the factory for all other attributes. + * @param delegate Attribute Factory + * @param collator CollationKey generator + */ + public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { + this.delegate = delegate; + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class attClass) { + return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class) + ? new ICUCollatedTermAttributeImpl(collator) + : delegate.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\icu\src\java\org\apache\lucene\collation\ICUCollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (working copy) @@ -19,24 +19,20 @@ import com.ibm.icu.text.Collator; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.Tokenizer; - +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.collation.CollationKeyAnalyzer; // javadocs +import org.apache.lucene.util.Version; import java.io.Reader; -import java.io.IOException; - /** *

* Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}. *

* Converts the token into its {@link com.ibm.icu.text.CollationKey}, and - * then encodes the CollationKey with - * {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow it to + * then encodes the CollationKey either directly or with + * {@link IndexableBinaryStringTools} (see below), to allow it to * be stored as an index term. *

*

@@ -70,39 +66,48 @@ * generation timing and key length comparisons between ICU4J and * java.text.Collator over several languages. *

+ * + *

You must specify the required {@link Version} + * compatibility when creating ICUCollationKeyAnalyzer: + *

*/ -public final class ICUCollationKeyAnalyzer extends Analyzer { - private Collator collator; +public final class ICUCollationKeyAnalyzer extends ReusableAnalyzerBase { + private final Collator collator; + private final ICUCollationAttributeFactory factory; + private final Version matchVersion; - public ICUCollationKeyAnalyzer(Collator collator) { + /** + * Create a new ICUCollationKeyAnalyzer, using the specified collator. + * + * @param matchVersion See
above + * @param collator CollationKey generator + */ + public ICUCollationKeyAnalyzer(Version matchVersion, Collator collator) { + this.matchVersion = matchVersion; this.collator = collator; + this.factory = new ICUCollationAttributeFactory(collator); } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new ICUCollationKeyFilter(result, collator); - return result; + /** + * @deprecated Use {@link ICUCollationKeyAnalyzer#ICUCollationKeyAnalyzer(Version, Collator)} + * and specify a version instead. This ctor will be removed in Lucene 5.0 + */ + @Deprecated + public ICUCollationKeyAnalyzer(Collator collator) { + this(Version.LUCENE_31, collator); } - - private class SavedStreams { - Tokenizer source; - TokenStream result; - } - + @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - - SavedStreams streams = (SavedStreams)getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - streams.source = new KeywordTokenizer(reader); - streams.result = new ICUCollationKeyFilter(streams.source, collator); - setPreviousTokenStream(streams); + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + if (matchVersion.onOrAfter(Version.LUCENE_40)) { + KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE); + return new TokenStreamComponents(tokenizer, tokenizer); } else { - streams.source.reset(reader); + KeywordTokenizer tokenizer = new KeywordTokenizer(reader); + return new TokenStreamComponents(tokenizer, new ICUCollationKeyFilter(tokenizer, collator)); } - return streams.result; } } Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (revision 1075074) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (working copy) @@ -68,7 +68,10 @@ * generation timing and key length comparisons between ICU4J and * java.text.Collator over several languages. *

+ * @deprecated Use {@link ICUCollationAttributeFactory} instead, which encodes + * terms directly as bytes. This filter will be removed in Lucene 5.0 */ +@Deprecated public final class ICUCollationKeyFilter extends TokenFilter { private Collator collator = null; private RawCollationKey reusableKey = new RawCollationKey(); Index: modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java (revision 0) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java (revision 0) @@ -0,0 +1,50 @@ +package org.apache.lucene.collation.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.util.BytesRef; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RawCollationKey; + +/** + * Extension of {@link CharTermAttributeImpl} that encodes the term + * text as a binary Unicode collation key instead of as UTF-8 bytes. + */ +public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl { + private final Collator collator; + private final RawCollationKey key = new RawCollationKey(); + + /** + * Create a new ICUCollatedTermAttributeImpl + * @param collator Collation key generator + */ + public ICUCollatedTermAttributeImpl(Collator collator) { + this.collator = collator; + } + + @Override + public int toBytesRef(BytesRef target) { + collator.getRawCollationKey(toString(), key); + target.bytes = key.bytes; + target.offset = 0; + target.length = key.size; + return target.hashCode(); + } +} Property changes on: modules\analysis\icu\src\java\org\apache\lucene\collation\tokenattributes\ICUCollatedTermAttributeImpl.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/CHANGES.txt =================================================================== --- modules/analysis/CHANGES.txt (revision 1075074) +++ modules/analysis/CHANGES.txt (working copy) @@ -25,6 +25,10 @@ * LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles can be generated. (Chris Harris via Steven Rowe) + * LUCENE-2514, LUCENE-2551: JDK and ICU CollationKeyAnalyzers were changed to + use pure byte keys when Version >= 4.0. This cuts sort key size approximately + in half. (Robert Muir) + New Features * LUCENE-2413: Consolidated Solr analysis components into common. Index: modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (revision 1075074) +++ modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (working copy) @@ -36,6 +36,7 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.document.Field; import org.apache.lucene.document.Document; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IndexableBinaryStringTools; import org.apache.lucene.util.LuceneTestCase; @@ -56,7 +57,9 @@ * @param keyBits the result from * collator.getCollationKey(original).toByteArray() * @return The encoded collation key for the original String + * @deprecated only for testing deprecated filters */ + @Deprecated protected String encodeCollationKey(byte[] keyBits) { // Ensure that the backing char[] array is large enough to hold the encoded // Binary String @@ -65,10 +68,10 @@ IndexableBinaryStringTools.encode(keyBits, 0, keyBits.length, encodedBegArray, 0, encodedLength); return new String(encodedBegArray); } - - public void testFarsiRangeFilterCollating(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, - String secondEnd) throws Exception { + + public void testFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, + BytesRef firstEnd, BytesRef secondBeg, + BytesRef secondEnd) throws Exception { RAMDirectory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); @@ -98,9 +101,9 @@ searcher.close(); } - public void testFarsiRangeQueryCollating(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, - String secondEnd) throws Exception { + public void testFarsiRangeQueryCollating(Analyzer analyzer, BytesRef firstBeg, + BytesRef firstEnd, BytesRef secondBeg, + BytesRef secondEnd) throws Exception { RAMDirectory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); @@ -126,8 +129,8 @@ searcher.close(); } - public void testFarsiTermRangeQuery(Analyzer analyzer, String firstBeg, - String firstEnd, String secondBeg, String secondEnd) throws Exception { + public void testFarsiTermRangeQuery(Analyzer analyzer, BytesRef firstBeg, + BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) throws Exception { RAMDirectory farsiIndex = new RAMDirectory(); IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig( Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (working copy) @@ -19,6 +19,8 @@ import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.BytesRef; import java.text.Collator; import java.util.Locale; @@ -34,17 +36,19 @@ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi // characters properly. private Collator collator = Collator.getInstance(new Locale("ar")); - private Analyzer analyzer = new CollationKeyAnalyzer(collator); + private Analyzer analyzer = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator); - private String firstRangeBeginning = encodeCollationKey - (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey - (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey - (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey - (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + private BytesRef firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); + private BytesRef firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray()); + private BytesRef secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); + private BytesRef secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + @Override + public void setUp() throws Exception { + super.setUp(); + assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec())); + } + public void testFarsiRangeFilterCollating() throws Exception { testFarsiRangeFilterCollating (analyzer, firstRangeBeginning, firstRangeEnd, @@ -65,13 +69,13 @@ public void testCollationKeySort() throws Exception { Analyzer usAnalyzer - = new CollationKeyAnalyzer(Collator.getInstance(Locale.US)); + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); Analyzer franceAnalyzer - = new CollationKeyAnalyzer(Collator.getInstance(Locale.FRANCE)); + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); Analyzer swedenAnalyzer - = new CollationKeyAnalyzer(Collator.getInstance(new Locale("sv", "se"))); + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se"))); Analyzer denmarkAnalyzer - = new CollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk"))); + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); // The ICU Collator and Sun java.text.Collator implementations differ in their // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision 1075074) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (working copy) @@ -21,12 +21,16 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.util.BytesRef; import java.text.Collator; import java.util.Locale; import java.io.Reader; - +/** + * @deprecated remove when CollationKeyFilter is removed. + */ +@Deprecated public class TestCollationKeyFilter extends CollationTestBase { // the sort order of Ø versus U depends on the version of the rules being used // for the inherited root locale: Ø's order isnt specified in Locale.US since @@ -39,14 +43,14 @@ private Collator collator = Collator.getInstance(new Locale("ar")); private Analyzer analyzer = new TestAnalyzer(collator); - private String firstRangeBeginning = encodeCollationKey - (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey - (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey - (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey - (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray())); + private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeEndOriginal).toByteArray())); + private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray())); + private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeEndOriginal).toByteArray())); public final class TestAnalyzer extends Analyzer { Index: modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (revision 1075074) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (working copy) @@ -29,8 +29,8 @@ * Emits the entire input as a single token. */ public final class KeywordTokenizer extends Tokenizer { - - private static final int DEFAULT_BUFFER_SIZE = 256; + /** Default read buffer size */ + public static final int DEFAULT_BUFFER_SIZE = 256; private boolean done = false; private int finalOffset; Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java (revision 0) @@ -0,0 +1,103 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; + +/** + *

+ * Converts each token into its {@link java.text.CollationKey}, and then + * encodes the bytes as an index term. + *

+ *

+ * WARNING: Make sure you use exactly the same Collator at + * index and query time -- CollationKeys are only comparable when produced by + * the same Collator. Since {@link java.text.RuleBasedCollator}s are not + * independently versioned, it is unsafe to search against stored + * CollationKeys unless the following are exactly the same (best practice is + * to store this information with the index and check that they remain the + * same at query time): + *

+ *
    + *
  1. JVM vendor
  2. + *
  3. JVM version, including patch version
  4. + *
  5. + * The language (and country and variant, if specified) of the Locale + * used when constructing the collator via + * {@link Collator#getInstance(java.util.Locale)}. + *
  6. + *
  7. + * The collation strength used - see {@link Collator#setStrength(int)} + *
  8. + *
+ *

+ * The ICUCollationAttributeFactory in the icu package of Lucene's + * contrib area uses ICU4J's Collator, which makes its + * version available, thus allowing collation to be versioned independently + * from the JVM. ICUCollationAttributeFactory is also significantly faster and + * generates significantly shorter keys than CollationAttributeFactory. See + * http://site.icu-project.org/charts/collation-icu4j-sun for key + * generation timing and key length comparisons between ICU4J and + * java.text.Collator over several languages. + *

+ *

+ * CollationKeys generated by java.text.Collators are not compatible + * with those those generated by ICU Collators. Specifically, if you use + * CollationAttributeFactory to generate index terms, do not use + * ICUCollationAttributeFactory on the query side, or vice versa. + *

+ */ +public class CollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + private final AttributeSource.AttributeFactory delegate; + + /** + * Create a CollationAttributeFactory, using + * {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the + * factory for all other attributes. + * @param collator CollationKey generator + */ + public CollationAttributeFactory(Collator collator) { + this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator); + } + + /** + * Create a CollationAttributeFactory, using the supplied Attribute Factory + * as the factory for all other attributes. + * @param delegate Attribute Factory + * @param collator CollationKey generator + */ + public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { + this.delegate = delegate; + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class attClass) { + return attClass.isAssignableFrom(CollatedTermAttributeImpl.class) + ? new CollatedTermAttributeImpl(collator) + : delegate.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\collation\CollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (working copy) @@ -18,14 +18,13 @@ */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; +import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link +import org.apache.lucene.util.Version; import java.text.Collator; import java.io.Reader; -import java.io.IOException; /** *

@@ -33,8 +32,8 @@ *

*

* Converts the token into its {@link java.text.CollationKey}, and then - * encodes the CollationKey with - * {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow + * encodes the CollationKey either directly or with + * {@link IndexableBinaryStringTools} (see below), to allow * it to be stored as an index term. *

*

@@ -75,39 +74,49 @@ * CollationKeyAnalyzer to generate index terms, do not use * ICUCollationKeyAnalyzer on the query side, or vice versa. *

+ * + *

You must specify the required {@link Version} + * compatibility when creating CollationKeyAnalyzer: + *

*/ -public final class CollationKeyAnalyzer extends Analyzer { - private Collator collator; - - public CollationKeyAnalyzer(Collator collator) { +public final class CollationKeyAnalyzer extends ReusableAnalyzerBase { + private final Collator collator; + private final CollationAttributeFactory factory; + private final Version matchVersion; + + /** + * Create a new CollationKeyAnalyzer, using the specified collator. + * + * @param matchVersion See
above + * @param collator CollationKey generator + */ + public CollationKeyAnalyzer(Version matchVersion, Collator collator) { + this.matchVersion = matchVersion; this.collator = collator; + this.factory = new CollationAttributeFactory(collator); } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new CollationKeyFilter(result, collator); - return result; - } - private class SavedStreams { - Tokenizer source; - TokenStream result; + /** + * @deprecated Use {@link CollationKeyAnalyzer#CollationKeyAnalyzer(Version, Collator)} + * and specify a version instead. This ctor will be removed in Lucene 5.0 + */ + @Deprecated + public CollationKeyAnalyzer(Collator collator) { + this(Version.LUCENE_31, collator); } - + @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - - SavedStreams streams = (SavedStreams)getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - streams.source = new KeywordTokenizer(reader); - streams.result = new CollationKeyFilter(streams.source, collator); - setPreviousTokenStream(streams); + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + if (matchVersion.onOrAfter(Version.LUCENE_40)) { + KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE); + return new TokenStreamComponents(tokenizer, tokenizer); } else { - streams.source.reset(reader); + KeywordTokenizer tokenizer = new KeywordTokenizer(reader); + return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator)); } - return streams.result; } } Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (revision 1075074) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (working copy) @@ -71,7 +71,10 @@ * CollationKeyFilter to generate index terms, do not use * ICUCollationKeyFilter on the query side, or vice versa. *

+ * @deprecated Use {@link CollationAttributeFactory} instead, which encodes + * terms directly as bytes. This filter will be removed in Lucene 5.0 */ +@Deprecated public final class CollationKeyFilter extends TokenFilter { private final Collator collator; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); Index: modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java (revision 0) @@ -0,0 +1,48 @@ +package org.apache.lucene.collation.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.util.BytesRef; + +/** + * Extension of {@link CharTermAttributeImpl} that encodes the term + * text as a binary Unicode collation key instead of as UTF-8 bytes. + */ +public class CollatedTermAttributeImpl extends CharTermAttributeImpl { + private final Collator collator; + + /** + * Create a new CollatedTermAttributeImpl + * @param collator Collation key generator + */ + public CollatedTermAttributeImpl(Collator collator) { + this.collator = collator; + } + + @Override + public int toBytesRef(BytesRef target) { + target.bytes = collator.getCollationKey(toString()).toByteArray(); + target.offset = 0; + target.length = target.bytes.length; + return target.hashCode(); + } + +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\collation\tokenattributes\CollatedTermAttributeImpl.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -577,50 +577,6 @@ assertQueryEquals("[\\* TO \"*\"]",null,"[\\* TO \\*]"); } - public void testFarsiRangeCollating() throws Exception { - Directory ramDir = newDirectory(); - IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); - Document doc = new Document(); - doc.add(newField("content","\u0633\u0627\u0628", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - iw.addDocument(doc); - iw.close(); - IndexSearcher is = new IndexSearcher(ramDir, true); - - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - qp.setRangeCollator(c); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is not - // supported). - - // Test ConstantScoreRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - // Test TermRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - is.close(); - ramDir.close(); - } - private String escapeDateString(String s) { if (s.indexOf(" ") > -1) { return "\"" + s + "\""; @@ -1260,4 +1216,41 @@ Query unexpanded = new TermQuery(new Term("field", "dogs")); assertEquals(unexpanded, smart.parse("\"dogs\"")); } + + /** + * Mock collation analyzer: indexes terms as "collated" + term + */ + private class MockCollationFilter extends TokenFilter { + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + protected MockCollationFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + String term = termAtt.toString(); + termAtt.setEmpty().append("collated").append(term); + return true; + } else { + return false; + } + } + + } + private class MockCollationAnalyzer extends Analyzer { + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); + } + } + + public void testCollatedRange() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCollationAnalyzer()); + qp.setAnalyzeRangeTerms(true); + Query expected = TermRangeQuery.newStringRange("field", "collatedabc", "collateddef", true, true); + Query actual = qp.parse("[abc TO def]"); + assertEquals(expected, actual); + } } Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -53,7 +53,7 @@ } public void testExclusive() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false); + Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; @@ -74,7 +74,7 @@ } public void testInclusive() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", true, true); + Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -105,11 +105,11 @@ query = new TermRangeQuery("content", null, null, false, false); assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); - query = new TermRangeQuery("content", "", null, true, false); + query = TermRangeQuery.newStringRange("content", "", null, true, false); assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); // and now anothe one - query = new TermRangeQuery("content", "B", null, true, false); + query = TermRangeQuery.newStringRange("content", "B", null, true, false); assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length); searcher.close(); @@ -121,7 +121,7 @@ initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}); IndexSearcher searcher = new IndexSearcher(dir, true); - TermRangeQuery query = new TermRangeQuery("content", "B", "J", true, true); + TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true); checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J"); final int savedClauseCount = BooleanQuery.getMaxClauseCount(); @@ -150,10 +150,10 @@ } public void testEqualsHashcode() { - Query query = new TermRangeQuery("content", "A", "C", true, true); + Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); query.setBoost(1.0f); - Query other = new TermRangeQuery("content", "A", "C", true, true); + Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true); other.setBoost(1.0f); assertEquals("query equals itself is true", query, query); @@ -163,122 +163,34 @@ other.setBoost(2.0f); assertFalse("Different boost queries are not equal", query.equals(other)); - other = new TermRangeQuery("notcontent", "A", "C", true, true); + other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true); assertFalse("Different fields are not equal", query.equals(other)); - other = new TermRangeQuery("content", "X", "C", true, true); + other = TermRangeQuery.newStringRange("content", "X", "C", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); - other = new TermRangeQuery("content", "A", "Z", true, true); + other = TermRangeQuery.newStringRange("content", "A", "Z", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); - query = new TermRangeQuery("content", null, "C", true, true); - other = new TermRangeQuery("content", null, "C", true, true); + query = TermRangeQuery.newStringRange("content", null, "C", true, true); + other = TermRangeQuery.newStringRange("content", null, "C", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); - query = new TermRangeQuery("content", "C", null, true, true); - other = new TermRangeQuery("content", "C", null, true, true); + query = TermRangeQuery.newStringRange("content", "C", null, true, true); + other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); - query = new TermRangeQuery("content", null, "C", true, true); - other = new TermRangeQuery("content", "C", null, true, true); + query = TermRangeQuery.newStringRange("content", null, "C", true, true); + other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); - query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", true, true); + query = TermRangeQuery.newStringRange("content", "A", "C", false, false); + other = TermRangeQuery.newStringRange("content", "A", "C", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); - - query = new TermRangeQuery("content", "A", "C", false, false); - other = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance()); - assertFalse("a query with a collator is not equal to one without", query.equals(other)); } - public void testExclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH)); - initializeIndex(new String[] {"A", "B", "C", "D"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,C,D, only B in range", 1, hits.length); - searcher.close(); - - initializeIndex(new String[] {"A", "B", "D"}); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,D, only B in range", 1, hits.length); - searcher.close(); - - addDoc("C"); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("C added, still only B in range", 1, hits.length); - searcher.close(); - } - - public void testInclusiveCollating() throws Exception { - Query query = new TermRangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH)); - - initializeIndex(new String[]{"A", "B", "C", "D"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); - searcher.close(); - - initializeIndex(new String[]{"A", "B", "D"}); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("A,B,D - A and B in range", 2, hits.length); - searcher.close(); - - addDoc("C"); - searcher = new IndexSearcher(dir, true); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("C added - A, B, C in range", 3, hits.length); - searcher.close(); - } - - public void testFarsi() throws Exception { - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator collator = Collator.getInstance(new Locale("ar")); - Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator); - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeQuery with a Farsi - // Collator (or an Arabic one for the case when Farsi is not supported). - initializeIndex(new String[]{ "\u0633\u0627\u0628"}); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, hits.length); - - query = new TermRangeQuery("content", "\u0633", "\u0638",true, true, collator); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, hits.length); - searcher.close(); - } - - public void testDanish() throws Exception { - Collator collator = Collator.getInstance(new Locale("da", "dk")); - // Danish collation orders the words below in the given order (example taken - // from TestSort.testInternationalSort() ). - String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - initializeIndex(words); - IndexSearcher searcher = new IndexSearcher(dir, true); - ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, hits.length); - - query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator); - hits = searcher.search(query, null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, hits.length); - searcher.close(); - } - private static class SingleCharAnalyzer extends Analyzer { private static class SingleCharTokenizer extends Tokenizer { @@ -363,7 +275,7 @@ public void testExclusiveLowerNull() throws Exception { Analyzer analyzer = new SingleCharAnalyzer(); //http://issues.apache.org/jira/browse/LUCENE-38 - Query query = new TermRangeQuery("content", null, "C", + Query query = TermRangeQuery.newStringRange("content", null, "C", false, false); initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer); IndexSearcher searcher = new IndexSearcher(dir, true); @@ -396,7 +308,7 @@ public void testInclusiveLowerNull() throws Exception { //http://issues.apache.org/jira/browse/LUCENE-38 Analyzer analyzer = new SingleCharAnalyzer(); - Query query = new TermRangeQuery("content", null, "C", true, true); + Query query = TermRangeQuery.newStringRange("content", null, "C", true, true); initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer); IndexSearcher searcher = new IndexSearcher(dir, true); int numHits = searcher.search(query, null, 1000).totalHits; Index: lucene/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -18,12 +18,8 @@ */ import java.io.IOException; -import java.text.Collator; import java.util.ArrayList; import java.util.BitSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -110,11 +106,6 @@ { "d", "m", null, null, null, null, null, null, null, null, null, null} }; - // the sort order of Ø versus U depends on the version of the rules being used - // for the inherited root locale: Ø's order isnt specified in Locale.US since - // its not used in english. - private boolean oStrokeFirst = Collator.getInstance(new Locale("")).compare("Ø", "U") < 0; - // create an index of all the documents, or just the x, or just the y documents private IndexSearcher getIndex (boolean even, boolean odd) throws IOException { @@ -564,12 +555,6 @@ sort.setSort (new SortField ("string", SortField.STRING, true) ); assertMatches (full, queryF, sort, "IJZ"); - sort.setSort (new SortField ("i18n", Locale.ENGLISH)); - assertMatches (full, queryF, sort, "ZJI"); - - sort.setSort (new SortField ("i18n", Locale.ENGLISH, true)); - assertMatches (full, queryF, sort, "IJZ"); - sort.setSort (new SortField ("int", SortField.INT) ); assertMatches (full, queryF, sort, "IZJ"); @@ -630,36 +615,6 @@ assertMatches (full, queryX, sort, "GICEA"); } - // test using a Locale for sorting strings - public void testLocaleSort() throws Exception { - sort.setSort (new SortField ("string", Locale.US) ); - assertMatches (full, queryX, sort, "AIGEC"); - assertMatches (full, queryY, sort, "DJHFB"); - - sort.setSort (new SortField ("string", Locale.US, true) ); - assertMatches (full, queryX, sort, "CEGIA"); - assertMatches (full, queryY, sort, "BFHJD"); - } - - // test using various international locales with accented characters - // (which sort differently depending on locale) - public void testInternationalSort() throws Exception { - sort.setSort (new SortField ("i18n", Locale.US)); - assertMatches (full, queryY, sort, oStrokeFirst ? "BFJHD" : "BFJDH"); - - sort.setSort (new SortField ("i18n", new Locale("sv", "se"))); - assertMatches (full, queryY, sort, "BJDFH"); - - sort.setSort (new SortField ("i18n", new Locale("da", "dk"))); - assertMatches (full, queryY, sort, "BJDHF"); - - sort.setSort (new SortField ("i18n", Locale.US)); - assertMatches (full, queryX, sort, "ECAGI"); - - sort.setSort (new SortField ("i18n", Locale.FRANCE)); - assertMatches (full, queryX, sort, "EACGI"); - } - // test a variety of sorts using a parallel multisearcher public void testParallelMultiSort() throws Exception { ExecutorService exec = Executors.newFixedThreadPool(_TestUtil.nextInt(random, 2, 8)); @@ -976,19 +931,6 @@ assertSaneFieldCaches(getName() + " various"); // next we'll check Locale based (String[]) for 'string', so purge first FieldCache.DEFAULT.purgeAllCaches(); - - sort.setSort(new SortField ("string", Locale.US) ); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField ("string", Locale.US, true) ); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - - sort.setSort(new SortField ("string", Locale.UK) ); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - assertSaneFieldCaches(getName() + " Locale.US + Locale.UK"); - FieldCache.DEFAULT.purgeAllCaches(); - } private void assertMatches(IndexSearcher searcher, Query query, Sort sort, String expectedResult) throws IOException { @@ -1014,37 +956,6 @@ assertEquals (msg, expectedResult, buff.toString()); } - private HashMap getScores (ScoreDoc[] hits, IndexSearcher searcher) - throws IOException { - HashMap scoreMap = new HashMap(); - int n = hits.length; - for (int i=0; i void assertSameValues (HashMap m1, HashMap m2) { - int n = m1.size(); - int m = m2.size(); - assertEquals (n, m); - Iterator iter = m1.keySet().iterator(); - while (iter.hasNext()) { - K key = iter.next(); - V o1 = m1.get(key); - V o2 = m2.get(key); - if (o1 instanceof Float) { - assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6); - } else { - assertEquals (m1.get(key), m2.get(key)); - } - } - } - public void testEmptyStringVsNullStringSort() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -92,26 +92,18 @@ /** macro for readability */ public static Query csrq(String f, String l, String h, boolean il, boolean ih) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return query; } public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih); + TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih); query.setRewriteMethod(method); return query; } /** macro for readability */ - public static Query csrq(String f, String l, String h, boolean il, - boolean ih, Collator c) { - TermRangeQuery query = new TermRangeQuery(f, l, h, il, ih, c); - query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - return query; - } - - /** macro for readability */ public static Query cspq(Term prefix) { PrefixQuery query = new PrefixQuery(prefix); query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); @@ -142,15 +134,6 @@ } @Test - public void testBasicsRngCollating() throws IOException { - Collator c = Collator.getInstance(Locale.ENGLISH); - QueryUtils.check(csrq("data", "1", "6", T, T, c)); - QueryUtils.check(csrq("data", "A", "Z", T, T, c)); - QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A", - "Z", T, T, c)); - } - - @Test public void testEqualScores() throws IOException { // NOTE: uses index build in *this* setUp @@ -262,7 +245,7 @@ // first do a regular TermRangeQuery which uses term expansion so // docs with more terms in range get higher scores - Query rq = new TermRangeQuery("data", "1", "4", T, T); + Query rq = TermRangeQuery.newStringRange("data", "1", "4", T, T); ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs; int numHits = expected.length; @@ -416,92 +399,6 @@ } @Test - public void testRangeQueryIdCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - IndexReader reader = signedIndexReader; - IndexSearcher search = newSearcher(reader); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test id, bounded on both ends - - result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs; - assertEquals("all but last", numDocs - 1, result.length); - - result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("all but first", numDocs - 1, result.length); - - result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("all but ends", numDocs - 2, result.length); - - result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("med and up", 1 + maxId - medId, result.length); - - result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("up to med", 1 + medId - minId, result.length); - - // unbounded id - - result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("min and up", numDocs, result.length); - - result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("max and down", numDocs, result.length); - - result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs; - assertEquals("not min, but up", numDocs - 1, result.length); - - result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("not max, but down", numDocs - 1, result.length); - - result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs; - assertEquals("med and up, not max", maxId - medId, result.length); - - result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("not min, up to med", medId - minId, result.length); - - // very small sets - - result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("med,med,F,F,c", 0, result.length); - result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - - result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs; - assertEquals("med,med,T,T,c", 1, result.length); - - search.close(); - } - - @Test public void testRangeQueryRand() throws IOException { // NOTE: uses index build in *super* setUp @@ -564,151 +461,4 @@ search.close(); } - - @Test - public void testRangeQueryRandCollating() throws IOException { - // NOTE: uses index build in *super* setUp - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = unsignedIndexReader; - IndexSearcher search = newSearcher(reader); - - String minRP = pad(unsignedIndexDir.minR); - String maxRP = pad(unsignedIndexDir.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - ScoreDoc[] result; - - Collator c = Collator.getInstance(Locale.ENGLISH); - - // test extremes, bounded on both ends - - result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("find all", numDocs, result.length); - - result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs; - assertEquals("all but biggest", numDocs - 1, result.length); - - result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("all but smallest", numDocs - 1, result.length); - - result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("all but extremes", numDocs - 2, result.length); - - // unbounded - - result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("smallest and up", numDocs, result.length); - - result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("biggest and down", numDocs, result.length); - - result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs; - assertEquals("not smallest, but up", numDocs - 1, result.length); - - result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("not biggest, but down", numDocs - 1, result.length); - - // very small sets - - result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("min,min,F,F,c", 0, result.length); - result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs; - assertEquals("max,max,F,F,c", 0, result.length); - - result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("min,min,T,T,c", 1, result.length); - result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs; - assertEquals("nul,min,F,T,c", 1, result.length); - - result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs; - assertEquals("max,max,T,T,c", 1, result.length); - result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs; - assertEquals("max,nul,T,T,c", 1, result.length); - - search.close(); - } - - @Test - public void testFarsi() throws Exception { - - /* build an index */ - Directory farsiIndex = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true)); - Document doc = new Document(); - doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc - .add(newField("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = newSearcher(reader); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not supported). - ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T, - c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, - 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - search.close(); - reader.close(); - farsiIndex.close(); - } - - @Test - public void testDanish() throws Exception { - - /* build an index */ - Directory danishIndex = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex, new MockAnalyzer(MockTokenizer.SIMPLE, true)); - - // Danish collation orders the words below in the given order - // (example taken from TestSort.testInternationalSort() ). - String[] words = { "H\u00D8T", "H\u00C5T", "MAND" }; - for (int docnum = 0 ; docnum < words.length ; ++docnum) { - Document doc = new Document(); - doc.add(newField("content", words[docnum], - Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(newField("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - } - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = newSearcher(reader); - - Collator c = Collator.getInstance(new Locale("da", "dk")); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - ScoreDoc[] result = search.search - (csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - result = search.search - (csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - search.close(); - reader.close(); - danishIndex.close(); - } } Index: lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java (working copy) @@ -112,7 +112,7 @@ } private void checkDuplicateTerms(MultiTermQuery.RewriteMethod method) throws Exception { - final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true); + final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true); mtq.setRewriteMethod(method); final Query q1 = searcher.rewrite(mtq); final Query q2 = multiSearcher.rewrite(mtq); @@ -158,7 +158,7 @@ final MultiTermQuery mtq = new MultiTermQuery("data") { @Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { - return new TermRangeTermsEnum(terms.iterator(), "2", "7", true, true, null) { + return new TermRangeTermsEnum(terms.iterator(), new BytesRef("2"), new BytesRef("7"), true, true) { final BoostAttribute boostAtt = attributes().addAttribute(BoostAttribute.class); @@ -203,7 +203,7 @@ // default gets restored automatically by LuceneTestCase: BooleanQuery.setMaxClauseCount(3); - final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true); + final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true); mtq.setRewriteMethod(method); try { multiSearcherDupls.rewrite(mtq); @@ -219,7 +219,7 @@ // default gets restored automatically by LuceneTestCase: BooleanQuery.setMaxClauseCount(3); - final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true); + final MultiTermQuery mtq = TermRangeQuery.newStringRange("data", "2", "7", true, true); mtq.setRewriteMethod(method); multiSearcherDupls.rewrite(mtq); } Index: lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java (working copy) @@ -67,7 +67,7 @@ if (lower>upper) { int a=lower; lower=upper; upper=a; } - TermRangeQuery cq=new TermRangeQuery("asc", format.format(lower), format.format(upper), true, true); + TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true); NumericRangeQuery tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.search(cq, 1); TopDocs nrTopDocs = searcher.search(tq, 1); Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -35,7 +35,7 @@ public void testCSQ() throws Exception { final Query q1 = new ConstantScoreQuery(new TermQuery(new Term("a", "b"))); final Query q2 = new ConstantScoreQuery(new TermQuery(new Term("a", "c"))); - final Query q3 = new ConstantScoreQuery(new TermRangeFilter("a", "b", "c", true, true)); + final Query q3 = new ConstantScoreQuery(TermRangeFilter.newStringRange("a", "b", "c", true, true)); QueryUtils.check(q1); QueryUtils.check(q2); QueryUtils.checkEqual(q1,q1); Index: lucene/src/test/org/apache/lucene/search/TestDateFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestDateFilter.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestDateFilter.java (working copy) @@ -61,12 +61,12 @@ // filter that should preserve matches // DateFilter df1 = DateFilter.Before("datefield", now); - TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now, DateTools.Resolution.MILLISECOND), false, true); // filter that should discard matches // DateFilter df2 = DateFilter.Before("datefield", now - 999999); - TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(0, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now - 2000, DateTools.Resolution.MILLISECOND), true, false); @@ -128,13 +128,13 @@ // filter that should preserve matches // DateFilter df1 = DateFilter.After("datefield", now); - TermRangeFilter df1 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df1 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now, DateTools.Resolution.MILLISECOND), DateTools .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), true, false); // filter that should discard matches // DateFilter df2 = DateFilter.After("datefield", now + 999999); - TermRangeFilter df2 = new TermRangeFilter("datefield", DateTools + TermRangeFilter df2 = TermRangeFilter.newStringRange("datefield", DateTools .timeToString(now + 999999, DateTools.Resolution.MILLISECOND), DateTools.timeToString(now + 999999999, DateTools.Resolution.MILLISECOND), false, true); Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -344,12 +344,10 @@ final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); NumericUtils.intToPrefixCoded(lower, 0, lowerBytes); NumericUtils.intToPrefixCoded(upper, 0, upperBytes); - // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! - final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -357,7 +355,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, lowerString, upperString, false, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -365,7 +363,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, lowerString, upperString, false, true); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -373,7 +371,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, lowerString, upperString, true, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -361,12 +361,10 @@ final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); NumericUtils.longToPrefixCoded(lower, 0, lowerBytes); NumericUtils.longToPrefixCoded(upper, 0, upperBytes); - // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! - final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -374,7 +372,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, lowerString, upperString, false, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -382,7 +380,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, lowerString, upperString, false, true); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -390,7 +388,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, lowerString, upperString, true, false); + cq=new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (working copy) @@ -187,7 +187,7 @@ * This tests FilteredQuery's rewrite correctness */ public void testRangeQuery() throws Exception { - TermRangeQuery rq = new TermRangeQuery( + TermRangeQuery rq = TermRangeQuery.newStringRange( "sorter", "b", "d", true, true); Query filteredquery = new FilteredQuery(rq, filter); Index: lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeFilter.java (working copy) @@ -18,15 +18,9 @@ */ import java.io.IOException; -import java.text.Collator; -import java.util.Locale; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.store.Directory; import org.junit.Test; /** @@ -61,83 +55,83 @@ // test id, bounded on both ends - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("find all", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, T, F), numDocs).scoreDocs; assertEquals("all but last", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, T), numDocs).scoreDocs; assertEquals("all but first", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, maxIP, F, F), numDocs).scoreDocs; assertEquals("all but ends", numDocs - 2, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("med and up", 1 + maxId - medId, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, medIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, T, T), numDocs).scoreDocs; assertEquals("up to med", 1 + medId - minId, result.length); // unbounded id - result = search.search(q, new TermRangeFilter("id", minIP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, T, F), numDocs).scoreDocs; assertEquals("min and up", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", null, maxIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, T), numDocs).scoreDocs; assertEquals("max and down", numDocs, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, null, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, null, F, F), numDocs).scoreDocs; assertEquals("not min, but up", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", null, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", null, maxIP, F, F), numDocs).scoreDocs; assertEquals("not max, but down", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, maxIP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, maxIP, T, F), numDocs).scoreDocs; assertEquals("med and up, not max", maxId - medId, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, medIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, medIP, F, T), numDocs).scoreDocs; assertEquals("not min, up to med", medId - minId, result.length); // very small sets - result = search.search(q, new TermRangeFilter("id", minIP, minIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, F, F), numDocs).scoreDocs; assertEquals("min,min,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, medIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, F, F), numDocs).scoreDocs; assertEquals("med,med,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, F, F), numDocs).scoreDocs; assertEquals("max,max,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("id", minIP, minIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", minIP, minIP, T, T), numDocs).scoreDocs; assertEquals("min,min,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", null, minIP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("id", null, minIP, F, T), numDocs).scoreDocs; assertEquals("nul,min,F,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, maxIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, maxIP, T, T), numDocs).scoreDocs; assertEquals("max,max,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", maxIP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("id", maxIP, null, T, F), numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("id", medIP, medIP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("id", medIP, medIP, T, T), numDocs).scoreDocs; assertEquals("med,med,T,T", 1, result.length); @@ -145,110 +139,6 @@ } @Test - public void testRangeFilterIdCollating() throws IOException { - - IndexReader reader = signedIndexReader; - IndexSearcher search = newSearcher(reader); - - Collator c = Collator.getInstance(Locale.ENGLISH); - - int medId = ((maxId - minId) / 2); - - String minIP = pad(minId); - String maxIP = pad(maxId); - String medIP = pad(medId); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - Query q = new TermQuery(new Term("body", "body")); - - // test id, bounded on both ends - int numHits = search.search(q, new TermRangeFilter("id", minIP, maxIP, T, - T, c), 1000).totalHits; - assertEquals("find all", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, T, F, c), 1000).totalHits; - assertEquals("all but last", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, T, c), 1000).totalHits; - assertEquals("all but first", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, maxIP, F, F, c), 1000).totalHits; - assertEquals("all but ends", numDocs - 2, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, T, c), 1000).totalHits; - assertEquals("med and up", 1 + maxId - medId, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, T, T, c), 1000).totalHits; - assertEquals("up to med", 1 + medId - minId, numHits); - - // unbounded id - - numHits = search.search(q, new TermRangeFilter("id", minIP, null, T, F, c), - 1000).totalHits; - assertEquals("min and up", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, T, c), - 1000).totalHits; - assertEquals("max and down", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("id", minIP, null, F, F, c), - 1000).totalHits; - assertEquals("not min, but up", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("id", null, maxIP, F, F, c), - 1000).totalHits; - assertEquals("not max, but down", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, maxIP, T, F, c), 1000).totalHits; - assertEquals("med and up, not max", maxId - medId, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, medIP, F, T, c), 1000).totalHits; - assertEquals("not min, up to med", medId - minId, numHits); - - // very small sets - - numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, F, F, c), 1000).totalHits; - assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, F, F, c), 1000).totalHits; - assertEquals("med,med,F,F", 0, numHits); - numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, F, F, c), 1000).totalHits; - assertEquals("max,max,F,F", 0, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", minIP, minIP, T, T, c), 1000).totalHits; - assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", null, minIP, F, T, c), - 1000).totalHits; - assertEquals("nul,min,F,T", 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", maxIP, maxIP, T, T, c), 1000).totalHits; - assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, new TermRangeFilter("id", maxIP, null, T, F, c), - 1000).totalHits; - assertEquals("max,nul,T,T", 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("id", medIP, medIP, T, T, c), 1000).totalHits; - assertEquals("med,med,T,T", 1, numHits); - - search.close(); - } - - @Test public void testRangeFilterRand() throws IOException { IndexReader reader = signedIndexReader; @@ -266,223 +156,63 @@ // test extremes, bounded on both ends - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, T), numDocs).scoreDocs; assertEquals("find all", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, T, F), numDocs).scoreDocs; assertEquals("all but biggest", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, T), numDocs).scoreDocs; assertEquals("all but smallest", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, maxRP, F, F), numDocs).scoreDocs; assertEquals("all but extremes", numDocs - 2, result.length); // unbounded - result = search.search(q, new TermRangeFilter("rand", minRP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, T, F), numDocs).scoreDocs; assertEquals("smallest and up", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, T), numDocs).scoreDocs; assertEquals("biggest and down", numDocs, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, null, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, null, F, F), numDocs).scoreDocs; assertEquals("not smallest, but up", numDocs - 1, result.length); - result = search.search(q, new TermRangeFilter("rand", null, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, maxRP, F, F), numDocs).scoreDocs; assertEquals("not biggest, but down", numDocs - 1, result.length); // very small sets - result = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, F, F), numDocs).scoreDocs; assertEquals("min,min,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, F, F), numDocs).scoreDocs; assertEquals("max,max,F,F", 0, result.length); - result = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", minRP, minRP, T, T), numDocs).scoreDocs; assertEquals("min,min,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", null, minRP, F, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", null, minRP, F, T), numDocs).scoreDocs; assertEquals("nul,min,F,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, maxRP, T, T), numDocs).scoreDocs; assertEquals("max,max,T,T", 1, result.length); - result = search.search(q, new TermRangeFilter("rand", maxRP, null, T, F), + result = search.search(q, TermRangeFilter.newStringRange("rand", maxRP, null, T, F), numDocs).scoreDocs; assertEquals("max,nul,T,T", 1, result.length); search.close(); } - - @Test - public void testRangeFilterRandCollating() throws IOException { - - // using the unsigned index because collation seems to ignore hyphens - IndexReader reader = unsignedIndexReader; - IndexSearcher search = newSearcher(reader); - - Collator c = Collator.getInstance(Locale.ENGLISH); - - String minRP = pad(unsignedIndexDir.minR); - String maxRP = pad(unsignedIndexDir.maxR); - - int numDocs = reader.numDocs(); - - assertEquals("num of docs", numDocs, 1 + maxId - minId); - - Query q = new TermQuery(new Term("body", "body")); - - // test extremes, bounded on both ends - - int numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, - T, c), 1000).totalHits; - assertEquals("find all", numDocs, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, T, F, - c), 1000).totalHits; - assertEquals("all but biggest", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, T, - c), 1000).totalHits; - assertEquals("all but smallest", numDocs - 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, maxRP, F, F, - c), 1000).totalHits; - assertEquals("all but extremes", numDocs - 2, numHits); - - // unbounded - - numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, T, F, c), 1000).totalHits; - assertEquals("smallest and up", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, T, c), 1000).totalHits; - assertEquals("biggest and down", numDocs, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", minRP, null, F, F, c), 1000).totalHits; - assertEquals("not smallest, but up", numDocs - 1, numHits); - - numHits = search.search(q, - new TermRangeFilter("rand", null, maxRP, F, F, c), 1000).totalHits; - assertEquals("not biggest, but down", numDocs - 1, numHits); - - // very small sets - - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, F, F, - c), 1000).totalHits; - assertEquals("min,min,F,F", 0, numHits); - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, F, F, - c), 1000).totalHits; - assertEquals("max,max,F,F", 0, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", minRP, minRP, T, T, - c), 1000).totalHits; - assertEquals("min,min,T,T", 1, numHits); - numHits = search.search(q, - new TermRangeFilter("rand", null, minRP, F, T, c), 1000).totalHits; - assertEquals("nul,min,F,T", 1, numHits); - - numHits = search.search(q, new TermRangeFilter("rand", maxRP, maxRP, T, T, - c), 1000).totalHits; - assertEquals("max,max,T,T", 1, numHits); - numHits = search.search(q, - new TermRangeFilter("rand", maxRP, null, T, F, c), 1000).totalHits; - assertEquals("max,nul,T,T", 1, numHits); - - search.close(); - } - - @Test - public void testFarsi() throws Exception { - - /* build an index */ - Directory farsiIndex = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, farsiIndex); - Document doc = new Document(); - doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc - .add(newField("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = newSearcher(reader); - Query q = new TermQuery(new Term("body", "body")); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi - // characters properly. - Collator collator = Collator.getInstance(new Locale("ar")); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the single - // index Term below should NOT be returned by a TermRangeFilter with a Farsi - // Collator (or an Arabic one for the case when Farsi is not supported). - int numHits = search.search(q, new TermRangeFilter("content", "\u062F", - "\u0698", T, T, collator), 1000).totalHits; - assertEquals("The index Term should not be included.", 0, numHits); - - numHits = search.search(q, new TermRangeFilter("content", "\u0633", - "\u0638", T, T, collator), 1000).totalHits; - assertEquals("The index Term should be included.", 1, numHits); - search.close(); - reader.close(); - farsiIndex.close(); - } - - @Test - public void testDanish() throws Exception { - - /* build an index */ - Directory danishIndex = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, danishIndex); - // Danish collation orders the words below in the given order - // (example taken from TestSort.testInternationalSort() ). - String[] words = {"H\u00D8T", "H\u00C5T", "MAND"}; - for (int docnum = 0; docnum < words.length; ++docnum) { - Document doc = new Document(); - doc.add(newField("content", words[docnum], Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(newField("body", "body", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - writer.addDocument(doc); - } - IndexReader reader = writer.getReader(); - writer.close(); - - IndexSearcher search = newSearcher(reader); - Query q = new TermQuery(new Term("body", "body")); - - Collator collator = Collator.getInstance(new Locale("da", "dk")); - - // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ], - // but Danish collation does. - int numHits = search.search(q, new TermRangeFilter("content", "H\u00D8T", - "MAND", F, F, collator), 1000).totalHits; - assertEquals("The index Term should be included.", 1, numHits); - - numHits = search.search(q, new TermRangeFilter("content", "H\u00C5T", - "MAND", F, F, collator), 1000).totalHits; - assertEquals("The index Term should not be included.", 0, numHits); - search.close(); - reader.close(); - danishIndex.close(); - } } Index: lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (revision 1075074) +++ lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (working copy) @@ -17,6 +17,10 @@ * limitations under the License. */ +/** + * @deprecated Remove when IndexableBinaryStringTools is removed. + */ +@Deprecated public class TestIndexableBinaryStringTools extends LuceneTestCase { private static final int NUM_RANDOM_TESTS = 2000 * RANDOM_MULTIPLIER; private static final int MAX_RANDOM_BINARY_LENGTH = 300 * RANDOM_MULTIPLIER; Index: lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java =================================================================== --- lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (working copy) @@ -78,9 +78,9 @@ // maps field names to date resolutions Map fieldToDateResolution = null; - // The collator to use when determining range inclusion, - // for use when constructing RangeQuerys. - Collator rangeCollator = null; + //Whether or not to analyze range terms when constructing RangeQuerys + // (For example, analyzing terms into collation keys for locale-sensitive RangeQuery) + boolean analyzeRangeTerms = false; boolean autoGeneratePhraseQueries; @@ -391,27 +391,21 @@ } /** - * Sets the collator used to determine index term inclusion in ranges - * for RangeQuerys. - *

- * WARNING: Setting the rangeCollator to a non-null - * collator using this method will cause every single index Term in the - * Field referenced by lowerTerm and/or upperTerm to be examined. - * Depending on the number of index Terms in this Field, the operation could - * be very slow. - * - * @param rc the collator to use when constructing RangeQuerys + * Set whether or not to analyze range terms when constructing RangeQuerys. + * For example, setting this to true can enable analyzing terms into + * collation keys for locale-sensitive RangeQuery. + * + * @param analyzeRangeTerms whether or not terms should be analyzed for RangeQuerys */ - public void setRangeCollator(Collator rc) { - rangeCollator = rc; + public void setAnalyzeRangeTerms(boolean analyzeRangeTerms) { + this.analyzeRangeTerms = analyzeRangeTerms; } /** - * @return the collator used to determine index term inclusion in ranges - * for RangeQuerys. + * @return whether or not to analyze range terms when constructing RangeQuerys. */ - public Collator getRangeCollator() { - return rangeCollator; + public boolean getAnalyzeRangeTerms() { + return analyzeRangeTerms; } protected void addClause(List clauses, int conj, int mods, Query q) { @@ -792,6 +786,36 @@ return new FuzzyQuery(term,minimumSimilarity,prefixLength); } + private BytesRef analyzeRangePart(String field, String part) { + TokenStream source; + + try { + source = analyzer.reusableTokenStream(field, new StringReader(part)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(part)); + } + + BytesRef result = new BytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + + try { + if (!source.incrementToken()) + throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + termAtt.toBytesRef(result); + if (source.incrementToken()) + throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part); + } catch (IOException e) { + throw new RuntimeException("error analyzing range part: " + part, e); + } + + try { + source.close(); + } catch (IOException ignored) {} + + return result; + } + /** * Builds a new TermRangeQuery instance * @param field Field @@ -802,7 +826,23 @@ * @return new TermRangeQuery instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, rangeCollator); + final BytesRef start; + final BytesRef end; + + if (part1 == null) { + start = null; + } else { + start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1); + } + + if (part2 == null) { + end = null; + } else { + end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2); + } + + final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive); + query.setRewriteMethod(multiTermRewriteMethod); return query; } Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy) @@ -77,7 +77,7 @@ } // *** TermToBytesRefAttribute interface *** - public final int toBytesRef(BytesRef target) { + public int toBytesRef(BytesRef target) { return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target); } Index: lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (working copy) @@ -20,9 +20,6 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.BytesRef; -import java.text.Collator; -import java.util.Locale; - /** * Expert: Collects sorted results from Searchable's and collates them. * The elements put into this queue must be of type FieldDoc. @@ -35,11 +32,6 @@ volatile SortField[] fields = null; - // used in the case where the fields are sorted by locale - // based strings - volatile Collator[] collators = null; - - /** * Creates a hit queue sorted by the given list of fields. * @param fields Fieldable names, in priority order (highest priority first). @@ -60,7 +52,6 @@ */ void setFields (SortField[] fields) { this.fields = fields; - this.collators = hasCollators (fields); } @@ -69,24 +60,6 @@ return fields; } - - /** Returns an array of collators, possibly null. The collators - * correspond to any SortFields which were given a specific locale. - * @param fields Array of sort fields. - * @return Array, possibly null. - */ - private Collator[] hasCollators (final SortField[] fields) { - if (fields == null) return null; - Collator[] ret = new Collator[fields.length]; - for (int i=0; ia is less relevant than b. * @param a ScoreDoc @@ -109,11 +82,9 @@ c = (s2 == null) ? 0 : -1; } else if (s2 == null) { c = 1; - } else if (fields[i].getLocale() == null) { - c = s1.compareTo(s2); } else { - c = collators[i].compare(s1.utf8ToString(), s2.utf8ToString()); - } + c = s1.compareTo(s2); + } } else { c = docA.fields[i].compareTo(docB.fields[i]); if (type == SortField.SCORE) { Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; import java.util.Comparator; import org.apache.lucene.index.TermsEnum; @@ -33,11 +32,8 @@ */ public class TermRangeTermsEnum extends FilteredTermsEnum { - private Collator collator; - private String upperTermText; - private String lowerTermText; - private boolean includeLower; - private boolean includeUpper; + final private boolean includeLower; + final private boolean includeUpper; final private BytesRef lowerBytesRef; final private BytesRef upperBytesRef; private final Comparator termComp; @@ -53,79 +49,61 @@ * * @param tenum * TermsEnum to filter - * @param lowerTermText + * @param lowerTerm * The term text at the lower end of the range - * @param upperTermText + * @param upperTerm * The term text at the upper end of the range * @param includeLower * If true, the lowerTerm is included in the range. * @param includeUpper * If true, the upperTerm is included in the range. - * @param collator - * The collator to use to collate index Terms, to determine their - * membership in the range bounded by lowerTerm and - * upperTerm. * * @throws IOException */ - public TermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText, - boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, + boolean includeLower, boolean includeUpper) throws IOException { super(tenum); - this.collator = collator; - this.upperTermText = upperTermText; - this.lowerTermText = lowerTermText; - this.includeLower = includeLower; - this.includeUpper = includeUpper; // do a little bit of normalization... // open ended range queries should always be inclusive. - if (this.lowerTermText == null) { - this.lowerTermText = ""; + if (lowerTerm == null) { + this.lowerBytesRef = new BytesRef(); this.includeLower = true; + } else { + this.lowerBytesRef = lowerTerm; + this.includeLower = includeLower; } - lowerBytesRef = new BytesRef(this.lowerTermText); - if (this.upperTermText == null) { + if (upperTerm == null) { this.includeUpper = true; upperBytesRef = null; } else { - upperBytesRef = new BytesRef(upperTermText); + this.includeUpper = includeUpper; + upperBytesRef = upperTerm; } - BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef(""); - setInitialSeekTerm(startBytesRef); + setInitialSeekTerm(lowerBytesRef); termComp = getComparator(); } @Override protected AcceptStatus accept(BytesRef term) { - if (collator == null) { - if (!this.includeLower && term.equals(lowerBytesRef)) - return AcceptStatus.NO; - // Use this field's default sort ordering - if (upperBytesRef != null) { - final int cmp = termComp.compare(upperBytesRef, term); - /* - * if beyond the upper term, or is exclusive and this is equal to - * the upper term, break out - */ - if ((cmp < 0) || - (!includeUpper && cmp==0)) { - return AcceptStatus.END; - } - } - return AcceptStatus.YES; - } else { - if ((includeLower - ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 - : collator.compare(term.utf8ToString(), lowerTermText) > 0) - && (upperTermText == null - || (includeUpper - ? collator.compare(term.utf8ToString(), upperTermText) <= 0 - : collator.compare(term.utf8ToString(), upperTermText) < 0))) { - return AcceptStatus.YES; - } + if (!this.includeLower && term.equals(lowerBytesRef)) return AcceptStatus.NO; + + // Use this field's default sort ordering + if (upperBytesRef != null) { + final int cmp = termComp.compare(upperBytesRef, term); + /* + * if beyond the upper term, or is exclusive and this is equal to + * the upper term, break out + */ + if ((cmp < 0) || + (!includeUpper && cmp==0)) { + return AcceptStatus.END; + } } + + return AcceptStatus.YES; } } Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.search; +import org.apache.lucene.util.BytesRef; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -17,15 +19,13 @@ * limitations under the License. */ -import java.text.Collator; - /** * A Filter that restricts search results to a range of term * values in a given field. * *

This filter matches the documents looking for terms that fall into the * supplied range according to {@link - * String#compareTo(String)}, unless a Collator is provided. It is not intended + * Byte#compareTo(Byte)}, It is not intended * for numerical ranges; use {@link NumericRangeFilter} instead. * *

If you construct a large number of range filters with different ranges but on the @@ -44,39 +44,25 @@ * lowerTerm is null and includeLower is true (similar for upperTerm * and includeUpper) */ - public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm, + public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper)); } /** - * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The lower bound on this range - * @param upperTerm The upper bound on this range - * @param includeLower Does this range include the lower bound? - * @param includeUpper Does this range include the upper bound? - * @param collator The collator to use when determining range inclusion; set - * to null to use Unicode code point ordering instead of collation. - * @throws IllegalArgumentException if both terms are null or if - * lowerTerm is null and includeLower is true (similar for upperTerm - * and includeUpper) + * Factory that creates a new TermRangeFilter using Strings for term text. */ - public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm, - boolean includeLower, boolean includeUpper, - Collator collator) { - super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm); + BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm); + return new TermRangeFilter(field, lower, upper, includeLower, includeUpper); } - + /** * Constructs a filter for field fieldName matching * less than or equal to upperTerm. */ - public static TermRangeFilter Less(String fieldName, String upperTerm) { + public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) { return new TermRangeFilter(fieldName, null, upperTerm, false, true); } @@ -84,22 +70,19 @@ * Constructs a filter for field fieldName matching * greater than or equal to lowerTerm. */ - public static TermRangeFilter More(String fieldName, String lowerTerm) { + public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) { return new TermRangeFilter(fieldName, lowerTerm, null, true, false); } /** Returns the lower value of this range filter */ - public String getLowerTerm() { return query.getLowerTerm(); } + public BytesRef getLowerTerm() { return query.getLowerTerm(); } /** Returns the upper value of this range filter */ - public String getUpperTerm() { return query.getUpperTerm(); } + public BytesRef getUpperTerm() { return query.getUpperTerm(); } /** Returns true if the lower endpoint is inclusive */ public boolean includesLower() { return query.includesLower(); } /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return query.includesUpper(); } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return query.getCollator(); } } Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -18,11 +18,11 @@ */ import java.io.IOException; -import java.text.Collator; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; /** @@ -30,7 +30,7 @@ * *

This query matches the documents looking for terms that fall into the * supplied range according to {@link - * String#compareTo(String)}, unless a Collator is provided. It is not intended + * Byte#compareTo(Byte)}. It is not intended * for numerical ranges; use {@link NumericRangeQuery} instead. * *

This query uses the {@link @@ -40,9 +40,8 @@ */ public class TermRangeQuery extends MultiTermQuery { - private String lowerTerm; - private String upperTerm; - private Collator collator; + private BytesRef lowerTerm; + private BytesRef upperTerm; private boolean includeLower; private boolean includeUpper; @@ -69,78 +68,48 @@ * If true, the upperTerm is * included in the range. */ - public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { - this(field, lowerTerm, upperTerm, includeLower, includeUpper, null); - } - - /** Constructs a query selecting all terms greater/equal than - * lowerTerm but less/equal than upperTerm. - *

- * If an endpoint is null, it is said - * to be "open". Either or both endpoints may be open. Open endpoints may not - * be exclusive (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - *

- * If collator is not null, it will be used to decide whether - * index terms are within the given range, rather than using the Unicode code - * point order in which index terms are stored. - *

- * WARNING: Using this constructor and supplying a non-null - * value in the collator parameter will cause every single - * index Term in the Field referenced by lowerTerm and/or upperTerm to be - * examined. Depending on the number of index Terms in this Field, the - * operation could be very slow. - * - * @param lowerTerm The Term text at the lower end of the range - * @param upperTerm The Term text at the upper end of the range - * @param includeLower - * If true, the lowerTerm is - * included in the range. - * @param includeUpper - * If true, the upperTerm is - * included in the range. - * @param collator The collator to use to collate index Terms, to determine - * their membership in the range bounded by lowerTerm and - * upperTerm. - */ - public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, - Collator collator) { + public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { super(field); this.lowerTerm = lowerTerm; this.upperTerm = upperTerm; this.includeLower = includeLower; this.includeUpper = includeUpper; - this.collator = collator; } + /** + * Factory that creates a new TermRangeQuery using Strings for term text. + */ + public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm); + BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm); + return new TermRangeQuery(field, lower, upper, includeLower, includeUpper); + } + /** Returns the lower value of this range query */ - public String getLowerTerm() { return lowerTerm; } + public BytesRef getLowerTerm() { return lowerTerm; } /** Returns the upper value of this range query */ - public String getUpperTerm() { return upperTerm; } + public BytesRef getUpperTerm() { return upperTerm; } /** Returns true if the lower endpoint is inclusive */ public boolean includesLower() { return includeLower; } /** Returns true if the upper endpoint is inclusive */ public boolean includesUpper() { return includeUpper; } - - /** Returns the collator used to determine range inclusion, if any. */ - public Collator getCollator() { return collator; } @Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { - if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { + if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { return TermsEnum.EMPTY; } TermsEnum tenum = terms.iterator(); - if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) { + if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) { return tenum; } return new TermRangeTermsEnum(tenum, - lowerTerm, upperTerm, includeLower, includeUpper, collator); + lowerTerm, upperTerm, includeLower, includeUpper); } /** Prints a user-readable version of this query. */ @@ -152,9 +121,10 @@ buffer.append(":"); } buffer.append(includeLower ? '[' : '{'); - buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm) : "*"); + // TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8! + buffer.append(lowerTerm != null ? ("*".equals(lowerTerm.utf8ToString()) ? "\\*" : lowerTerm.utf8ToString()) : "*"); buffer.append(" TO "); - buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*"); + buffer.append(upperTerm != null ? ("*".equals(upperTerm.utf8ToString()) ? "\\*" : upperTerm.utf8ToString()) : "*"); buffer.append(includeUpper ? ']' : '}'); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); @@ -164,7 +134,6 @@ public int hashCode() { final int prime = 31; int result = super.hashCode(); - result = prime * result + ((collator == null) ? 0 : collator.hashCode()); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); @@ -181,11 +150,6 @@ if (getClass() != obj.getClass()) return false; TermRangeQuery other = (TermRangeQuery) obj; - if (collator == null) { - if (other.collator != null) - return false; - } else if (!collator.equals(other.collator)) - return false; if (includeLower != other.includeLower) return false; if (includeUpper != other.includeUpper) Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -18,8 +18,6 @@ */ import java.io.IOException; -import java.text.Collator; -import java.util.Locale; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldCache.DocTermsIndex; @@ -718,85 +716,6 @@ } } - - /** Sorts by a field's value using the Collator for a - * given Locale. - * - *

WARNING: this is likely very slow; you'll - * get much better performance using the - * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */ - public static final class StringComparatorLocale extends FieldComparator { - - private final String[] values; - private DocTerms currentDocTerms; - private final String field; - final Collator collator; - private String bottom; - private final BytesRef tempBR = new BytesRef(); - - StringComparatorLocale(int numHits, String field, Locale locale) { - values = new String[numHits]; - this.field = field; - collator = Collator.getInstance(locale); - } - - @Override - public int compare(int slot1, int slot2) { - final String val1 = values[slot1]; - final String val2 = values[slot2]; - if (val1 == null) { - if (val2 == null) { - return 0; - } - return -1; - } else if (val2 == null) { - return 1; - } - return collator.compare(val1, val2); - } - - @Override - public int compareBottom(int doc) { - final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString(); - if (bottom == null) { - if (val2 == null) { - return 0; - } - return -1; - } else if (val2 == null) { - return 1; - } - return collator.compare(bottom, val2); - } - - @Override - public void copy(int slot, int doc) { - final BytesRef br = currentDocTerms.getTerm(doc, tempBR); - if (br == null) { - values[slot] = null; - } else { - values[slot] = br.utf8ToString(); - } - } - - @Override - public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field); - return this; - } - - @Override - public void setBottom(final int bottom) { - this.bottom = values[bottom]; - } - - @Override - public Comparable value(int slot) { - final String s = values[slot]; - return s == null ? null : new BytesRef(values[slot]); - } - } - /** Sorts by field's natural Term sort order, using * ordinals. This is functionally equivalent to {@link * TermValComparator}, but it first resolves the string Index: lucene/src/java/org/apache/lucene/search/SortField.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SortField.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.util.Locale; import org.apache.lucene.search.cache.*; import org.apache.lucene.util.StringHelper; @@ -90,7 +89,6 @@ private String field; private int type; // defaults to determining type dynamically - private Locale locale; // defaults to "natural order" (no Locale) boolean reverse = false; // defaults to natural order private CachedArrayCreator creator; public Object missingValue = null; // used for 'sortMissingFirst/Last' @@ -213,29 +211,7 @@ } return this; } - - /** Creates a sort by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. - */ - public SortField (String field, Locale locale) { - initFieldType(field, STRING); - this.locale = locale; - } - - /** Creates a sort, possibly in reverse, by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. - */ - public SortField (String field, Locale locale, boolean reverse) { - initFieldType(field, STRING); - this.locale = locale; - this.reverse = reverse; - } - /** Creates a sort with a custom comparison function. * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. @@ -295,14 +271,6 @@ return type; } - /** Returns the Locale by which term values are interpreted. - * May return null if no Locale was specified. - * @return Locale, or null. - */ - public Locale getLocale() { - return locale; - } - /** Returns the instance of a {@link FieldCache} parser that fits to the given sort type. * May return null if no parser was specified. Sorting is using the default parser then. * @return An instance of a {@link FieldCache} parser, or null. @@ -384,7 +352,6 @@ break; } - if (locale != null) buffer.append('(').append(locale).append(')'); if (creator != null) buffer.append('(').append(creator).append(')'); if (reverse) buffer.append('!'); @@ -404,7 +371,6 @@ other.field == this.field // field is always interned && other.type == this.type && other.reverse == this.reverse - && (other.locale == null ? this.locale == null : other.locale.equals(this.locale)) && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource)) && (other.creator == null ? this.creator == null : other.creator.equals(this.creator)) ); @@ -419,7 +385,6 @@ public int hashCode() { int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb; if (field != null) hash += field.hashCode()^0xff5685dd; - if (locale != null) hash += locale.hashCode()^0x08150815; if (comparatorSource != null) hash += comparatorSource.hashCode(); if (creator != null) hash += creator.hashCode()^0x3aaf56ff; return hash; @@ -439,13 +404,6 @@ */ public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException { - if (locale != null) { - // TODO: it'd be nice to allow FieldCache.getStringIndex - // to optionally accept a Locale so sorting could then use - // the faster StringComparator impls - return new FieldComparator.StringComparatorLocale(numHits, field, locale); - } - switch (type) { case SortField.SCORE: return new FieldComparator.RelevanceComparator(numHits); Index: lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java =================================================================== --- lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (revision 1075074) +++ lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (working copy) @@ -39,7 +39,10 @@ *

* * @lucene.experimental + * @deprecated Implement {@link TermToBytesRefAttribute} and store bytes directly + * instead. This class will be removed in Lucene 5.0 */ +@Deprecated public final class IndexableBinaryStringTools { private static final CodingCase[] CODING_CASES = { Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 1075074) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -642,55 +642,6 @@ "gack (bar blar {a TO z})"); } - public void testFarsiRangeCollating() throws Exception { - Directory ramDir = newDirectory(); - IndexWriter iw = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); - Document doc = new Document(); - doc.add(newField("content", "\u0633\u0627\u0628", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - iw.addDocument(doc); - iw.close(); - IndexSearcher is = new IndexSearcher(ramDir, true); - - StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - - // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in - // RuleBasedCollator. However, the Arabic Locale seems to order the - // Farsi - // characters properly. - Collator c = Collator.getInstance(new Locale("ar")); - qp.setRangeCollator(c); - - // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi - // orders the U+0698 character before the U+0633 character, so the - // single - // index Term below should NOT be returned by a ConstantScoreRangeQuery - // with a Farsi Collator (or an Arabic one for the case when Farsi is - // not - // supported). - - // Test ConstantScoreRangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), - null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - // Test RangeQuery - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - result = is.search(qp.parse("[ \u062F TO \u0698 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should not be included.", 0, result.length); - - result = is.search(qp.parse("[ \u0633 TO \u0638 ]", "content"), null, 1000).scoreDocs; - assertEquals("The index Term should be included.", 1, result.length); - - is.close(); - ramDir.close(); - } - /** for testing DateTools support */ private String getDate(String s, DateTools.Resolution resolution) throws Exception { Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java (revision 1075074) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java (working copy) @@ -60,8 +60,6 @@ Collections.singletonMap(MultiTermRewriteMethodAttribute.class.getName()+"#multiTermRewriteMethod", MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT)); _TestUtil.assertAttributeReflection(new PositionIncrementsAttributeImpl(), Collections.singletonMap(PositionIncrementsAttribute.class.getName()+"#positionIncrementsEnabled", false)); - _TestUtil.assertAttributeReflection(new RangeCollatorAttributeImpl(), - Collections.singletonMap(RangeCollatorAttribute.class.getName()+"#rangeCollator", null)); } } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/ParametricRangeQueryNodeProcessor.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.text.Collator; import java.text.DateFormat; import java.util.Calendar; import java.util.Date; @@ -36,7 +35,6 @@ import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryParser.standard.config.DateResolutionAttribute; import org.apache.lucene.queryParser.standard.config.LocaleAttribute; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; /** @@ -54,12 +52,7 @@ * If a {@link DateResolutionAttribute} is defined and the {@link Resolution} is * not null it will also be used to parse the date value.
*
- * This processor will also try to retrieve a {@link RangeCollatorAttribute} - * from the {@link QueryConfigHandler}. If a {@link RangeCollatorAttribute} is - * found and the {@link Collator} is not null, it's set on the - * {@link RangeQueryNode}.
* - * @see RangeCollatorAttribute * @see DateResolutionAttribute * @see LocaleAttribute * @see RangeQueryNode @@ -79,17 +72,9 @@ ParametricQueryNode upper = parametricRangeNode.getUpperBound(); ParametricQueryNode lower = parametricRangeNode.getLowerBound(); Locale locale = Locale.getDefault(); - Collator collator = null; DateTools.Resolution dateRes = null; boolean inclusive = false; - if (getQueryConfigHandler().hasAttribute(RangeCollatorAttribute.class)) { - - collator = getQueryConfigHandler().getAttribute( - RangeCollatorAttribute.class).getRangeCollator(); - - } - if (getQueryConfigHandler().hasAttribute(LocaleAttribute.class)) { locale = getQueryConfigHandler().getAttribute(LocaleAttribute.class) @@ -155,7 +140,7 @@ lower.setText(part1); upper.setText(part2); - return new RangeQueryNode(lower, upper, collator); + return new RangeQueryNode(lower, upper); } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/StandardQueryParser.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.text.Collator; import java.util.Locale; import java.util.Map; import java.util.TooManyListenersException; @@ -41,10 +40,8 @@ import org.apache.lucene.queryParser.standard.config.MultiFieldAttribute; import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; import org.apache.lucene.queryParser.standard.config.PositionIncrementsAttribute; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.config.StandardQueryConfigHandler; import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; -import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; import org.apache.lucene.queryParser.standard.parser.StandardSyntaxParser; import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; import org.apache.lucene.search.FuzzyQuery; @@ -188,32 +185,6 @@ } /** - * Sets the collator used to determine index term inclusion in ranges for - * RangeQuerys. - *

- * WARNING: Setting the rangeCollator to a non-null collator - * using this method will cause every single index Term in the Field - * referenced by lowerTerm and/or upperTerm to be examined. Depending on the - * number of index Terms in this Field, the operation could be very slow. - * - * @param collator - * the collator to use when constructing {@link RangeQueryNode}s - */ - public void setRangeCollator(Collator collator) { - RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class); - attr.setDateResolution(collator); - } - - /** - * @return the collator used to determine index term inclusion in ranges for - * RangeQuerys. - */ - public Collator getRangeCollator() { - RangeCollatorAttribute attr = getQueryConfigHandler().getAttribute(RangeCollatorAttribute.class); - return attr.getRangeCollator(); - } - - /** * Sets the boolean operator of the QueryParser. In default mode ( * {@link Operator#OR}) terms without any modifiers are considered optional: * for example capital of Hungary is equal to Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.queryParser.standard.config; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.Collator; - -import org.apache.lucene.queryParser.core.config.QueryConfigHandler; -import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.AttributeImpl; - -/** - * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor - * and must be defined in the {@link QueryConfigHandler}. This attribute tells - * the processor which {@link Collator} should be used for a - * {@link TermRangeQuery}
- * - * @see org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute - */ -public class RangeCollatorAttributeImpl extends AttributeImpl - implements RangeCollatorAttribute { - - private Collator rangeCollator; - - public RangeCollatorAttributeImpl() { - rangeCollator = null; // default value for 2.4 - } - - public void setDateResolution(Collator rangeCollator) { - this.rangeCollator = rangeCollator; - } - - public Collator getRangeCollator() { - return this.rangeCollator; - } - - @Override - public void clear() { - throw new UnsupportedOperationException(); - } - - @Override - public void copyTo(AttributeImpl target) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean equals(Object other) { - - if (other instanceof RangeCollatorAttributeImpl) { - RangeCollatorAttributeImpl rangeCollatorAttr = (RangeCollatorAttributeImpl) other; - - if (rangeCollatorAttr.rangeCollator == this.rangeCollator - || rangeCollatorAttr.rangeCollator.equals(this.rangeCollator)) { - - return true; - - } - - } - - return false; - - } - - @Override - public int hashCode() { - return (this.rangeCollator == null) ? 0 : this.rangeCollator.hashCode(); - } - - @Override - public String toString() { - return ""; - } - -} Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttribute.java (working copy) @@ -1,37 +0,0 @@ -package org.apache.lucene.queryParser.standard.config; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.text.Collator; - -import org.apache.lucene.queryParser.core.config.QueryConfigHandler; -import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.Attribute; - -/** - * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor - * and must be defined in the {@link QueryConfigHandler}. This attribute tells - * the processor which {@link Collator} should be used for a - * {@link TermRangeQuery}
- * - */ -public interface RangeCollatorAttribute extends Attribute { - public void setDateResolution(Collator rangeCollator); - public Collator getRangeCollator(); -} Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/StandardQueryConfigHandler.java (working copy) @@ -38,7 +38,6 @@ addFieldConfigListener(new FieldDateResolutionFCListener(this)); // Default Values - addAttribute(RangeCollatorAttribute.class); addAttribute(DefaultOperatorAttribute.class); addAttribute(AnalyzerAttribute.class); addAttribute(FuzzyAttribute.class); Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RangeQueryNode.java (working copy) @@ -17,34 +17,24 @@ * limitations under the License. */ -import java.text.Collator; - import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; -import org.apache.lucene.queryParser.standard.config.RangeCollatorAttribute; import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor; /** - * This query node represents a range query. It also holds which collator will - * be used by the range query and if the constant score rewrite is enabled.
+ * This query node represents a range query. * * @see ParametricRangeQueryNodeProcessor - * @see RangeCollatorAttribute * @see org.apache.lucene.search.TermRangeQuery */ public class RangeQueryNode extends ParametricRangeQueryNode { - private Collator collator; - /** * @param lower * @param upper */ - public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper, Collator collator) { + public RangeQueryNode(ParametricQueryNode lower, ParametricQueryNode upper) { super(lower, upper); - - this.collator = collator; - } @Override @@ -57,12 +47,4 @@ return sb.toString(); } - - /** - * @return the collator - */ - public Collator getCollator() { - return this.collator; - } - } Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RangeQueryNodeBuilder.java (working copy) @@ -53,9 +53,7 @@ String field = rangeNode.getField().toString(); - TermRangeQuery rangeQuery = new TermRangeQuery(field, lower - .getTextAsString(), upper.getTextAsString(), lowerInclusive, - upperInclusive, rangeNode.getCollator()); + TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, lower.getTextAsString(), upper.getTextAsString(), lowerInclusive, upperInclusive); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodAttribute.TAG_ID); if (method != null) { Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (revision 1075074) +++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java (working copy) @@ -174,8 +174,7 @@ if (isPass2ResolvingPhrases) { // Must use old-style RangeQuery in order to produce a BooleanQuery // that can be turned into SpanOr clause - TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, startInclusive, endInclusive, - getRangeCollator()); + TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } Index: lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java =================================================================== --- lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java (revision 1075074) +++ lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/RangeFilterBuilder.java (working copy) @@ -41,7 +41,7 @@ String upperTerm=e.getAttribute("upperTerm"); boolean includeLower=DOMUtils.getAttribute(e,"includeLower",true); boolean includeUpper=DOMUtils.getAttribute(e,"includeUpper",true); - return new TermRangeFilter(fieldName,lowerTerm,upperTerm,includeLower,includeUpper); + return TermRangeFilter.newStringRange(fieldName,lowerTerm,upperTerm,includeLower,includeUpper); } } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1075074) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -828,7 +828,7 @@ @Override public void run() throws Exception { numHighlights = 0; - TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true); + TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true); SpanQuery clauses[] = { new SpanTermQuery(new Term("contents", "john")), new SpanTermQuery(new Term("contents", "kennedy")), }; SpanNearQuery snq = new SpanNearQuery(clauses, 1, true); @@ -851,7 +851,7 @@ @Override public void run() throws Exception { numHighlights = 0; - TermRangeFilter rf = new TermRangeFilter("contents", "john", "john", true, true); + TermRangeFilter rf = TermRangeFilter.newStringRange("contents", "john", "john", true, true); PhraseQuery pq = new PhraseQuery(); pq.add(new Term("contents", "john")); pq.add(new Term("contents", "kennedy")); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java (revision 1075074) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/ChainedFilterTest.java (working copy) @@ -84,7 +84,7 @@ //Date pastTheEnd = parseDate("2099 Jan 1"); // dateFilter = DateFilter.Before("date", pastTheEnd); // just treat dates as strings and select the whole range for now... - dateFilter = new TermRangeFilter("date","","ZZZZ",true,true); + dateFilter = TermRangeFilter.newStringRange("date","","ZZZZ",true,true); bobFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "bob"))); Index: lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (revision 1075074) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (working copy) @@ -70,7 +70,7 @@ private Filter getRangeFilter(String field,String lowerPrice, String upperPrice) { - Filter f = new TermRangeFilter(field,lowerPrice,upperPrice,true,true); + Filter f = TermRangeFilter.newStringRange(field,lowerPrice,upperPrice,true,true); return f; } private Filter getTermsFilter(String field,String text) Index: lucene/contrib/queries/src/test/org/apache/lucene/search/TestSlowCollationMethods.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/TestSlowCollationMethods.java (revision 0) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/TestSlowCollationMethods.java (revision 0) @@ -0,0 +1,141 @@ +package org.apache.lucene.search; + +import java.io.IOException; +import java.text.Collator; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests SlowCollatedStringComparator, SlowCollatedTermRangeQuery, and SlowCollatedTermRangeFilter + */ +public class TestSlowCollationMethods extends LuceneTestCase { + private static Collator collator; + private static IndexSearcher searcher; + private static IndexReader reader; + private static Directory dir; + private static int numDocs; + + @BeforeClass + public static void beforeClass() throws Exception { + final Locale locale = LuceneTestCase.randomLocale(random); + collator = Collator.getInstance(locale); + collator.setStrength(Collator.IDENTICAL); + collator.setDecomposition(Collator.NO_DECOMPOSITION); + + numDocs = 1000 * RANDOM_MULTIPLIER; + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + String value = _TestUtil.randomUnicodeString(random); + Field field = newField("field", value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + doc.add(field); + iw.addDocument(doc); + } + reader = iw.getReader(); + iw.close(); + + // TODO: we should be able to use newSearcher, but there are synchronization problems in fieldcache + searcher = new IndexSearcher(reader); + //searcher = newSearcher(reader); + } + + @AfterClass + public static void afterClass() throws Exception { + searcher.close(); + reader.close(); + dir.close(); + collator = null; + searcher = null; + reader = null; + dir = null; + } + + public void testSort() throws Exception { + SortField sf = new SortField("field", new FieldComparatorSource() { + @Override + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { + return new SlowCollatedStringComparator(numHits, fieldname, collator); + } + }); + TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, numDocs, new Sort(sf)); + String prev = ""; + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, prev) >= 0); + prev = value; + } + } + + private void doTestRanges(String startPoint, String endPoint, Query query) throws Exception { + // positive test + TopDocs docs = searcher.search(query, numDocs); + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) >= 0); + assertTrue(collator.compare(value, endPoint) <= 0); + } + + // negative test + BooleanQuery bq = new BooleanQuery(); + bq.add(new MatchAllDocsQuery(), Occur.SHOULD); + bq.add(query, Occur.MUST_NOT); + docs = searcher.search(bq, numDocs); + for (ScoreDoc doc : docs.scoreDocs) { + String value = reader.document(doc.doc).get("field"); + assertTrue(collator.compare(value, startPoint) < 0 || collator.compare(value, endPoint) > 0); + } + } + + public void testRangeQuery() throws Exception { + int numQueries = 100*RANDOM_MULTIPLIER; + for (int i = 0; i < numQueries; i++) { + String startPoint = _TestUtil.randomUnicodeString(random); + String endPoint = _TestUtil.randomUnicodeString(random); + Query query = new SlowCollatedTermRangeQuery("field", startPoint, endPoint, true, true, collator); + doTestRanges(startPoint, endPoint, query); + } + } + + public void testRangeFilter() throws Exception { + int numQueries = 100*RANDOM_MULTIPLIER; + for (int i = 0; i < numQueries; i++) { + String startPoint = _TestUtil.randomUnicodeString(random); + String endPoint = _TestUtil.randomUnicodeString(random); + Query query = new ConstantScoreQuery(new SlowCollatedTermRangeFilter("field", startPoint, endPoint, true, true, collator)); + doTestRanges(startPoint, endPoint, query); + } + } +} Property changes on: lucene\contrib\queries\src\test\org\apache\lucene\search\TestSlowCollationMethods.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeTermsEnum.java (revision 0) @@ -0,0 +1,102 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +/** + * Subclass of FilteredTermEnum for enumerating all terms that match the + * specified range parameters. + *

Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.

+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal than lowerTerm + * but less/equal than upperTerm. + * + * If an endpoint is null, it is said to be "open". Either or both + * endpoints may be open. Open endpoints may not be exclusive + * (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + * + * @param tenum + * @param lowerTermText + * The term text at the lower end of the range + * @param upperTermText + * The term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is included in the range. + * @param includeUpper + * If true, the upperTerm is included in the range. + * @param collator + * The collator to use to collate index Terms, to determine their + * membership in the range bounded by lowerTerm and + * upperTerm. + * + * @throws IOException + */ + public SlowCollatedTermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText, + boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + super(tenum); + this.collator = collator; + this.upperTermText = upperTermText; + this.lowerTermText = lowerTermText; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (this.lowerTermText == null) { + this.lowerTermText = ""; + this.includeLower = true; + } + + // TODO: optimize + BytesRef startBytesRef = new BytesRef(""); + setInitialSeekTerm(startBytesRef); + } + + @Override + protected AcceptStatus accept(BytesRef term) { + if ((includeLower + ? collator.compare(term.utf8ToString(), lowerTermText) >= 0 + : collator.compare(term.utf8ToString(), lowerTermText) > 0) + && (upperTermText == null + || (includeUpper + ? collator.compare(term.utf8ToString(), upperTermText) <= 0 + : collator.compare(term.utf8ToString(), upperTermText) < 0))) { + return AcceptStatus.YES; + } + return AcceptStatus.NO; + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeTermsEnum.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0) @@ -0,0 +1,176 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.ToStringUtils; + +/** + * A Query that matches documents within an range of terms. + * + *

This query matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeQuery} instead. + * + *

This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeQuery extends MultiTermQuery { + private String lowerTerm; + private String upperTerm; + private boolean includeLower; + private boolean includeUpper; + private Collator collator; + + /** Constructs a query selecting all terms greater/equal than + * lowerTerm but less/equal than upperTerm. + *

+ * If an endpoint is null, it is said + * to be "open". Either or both endpoints may be open. Open endpoints may not + * be exclusive (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + *

+ * + * @param lowerTerm The Term text at the lower end of the range + * @param upperTerm The Term text at the upper end of the range + * @param includeLower + * If true, the lowerTerm is + * included in the range. + * @param includeUpper + * If true, the upperTerm is + * included in the range. + * @param collator The collator to use to collate index Terms, to determine + * their membership in the range bounded by lowerTerm and + * upperTerm. + */ + public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, Collator collator) { + super(field); + this.lowerTerm = lowerTerm; + this.upperTerm = upperTerm; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.collator = collator; + } + + /** Returns the lower value of this range query */ + public String getLowerTerm() { return lowerTerm; } + + /** Returns the upper value of this range query */ + public String getUpperTerm() { return upperTerm; } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return includeLower; } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return includeUpper; } + + /** Returns the collator used to determine range inclusion */ + public Collator getCollator() { return collator; } + + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) { + return TermsEnum.EMPTY; + } + + TermsEnum tenum = terms.iterator(); + + if (lowerTerm == null && upperTerm == null) { + return tenum; + } + return new SlowCollatedTermRangeTermsEnum(tenum, + lowerTerm, upperTerm, includeLower, includeUpper, collator); + } + + /** @deprecated */ + @Deprecated + public String field() { + return getField(); + } + + /** Prints a user-readable version of this query. */ + @Override + public String toString(String field) { + StringBuilder buffer = new StringBuilder(); + if (!getField().equals(field)) { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(includeLower ? '[' : '{'); + buffer.append(lowerTerm != null ? lowerTerm : "*"); + buffer.append(" TO "); + buffer.append(upperTerm != null ? upperTerm : "*"); + buffer.append(includeUpper ? ']' : '}'); + buffer.append(ToStringUtils.boost(getBoost())); + return buffer.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((collator == null) ? 0 : collator.hashCode()); + result = prime * result + (includeLower ? 1231 : 1237); + result = prime * result + (includeUpper ? 1231 : 1237); + result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); + result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj; + if (collator == null) { + if (other.collator != null) + return false; + } else if (!collator.equals(other.collator)) + return false; + if (includeLower != other.includeLower) + return false; + if (includeUpper != other.includeUpper) + return false; + if (lowerTerm == null) { + if (other.lowerTerm != null) + return false; + } else if (!lowerTerm.equals(other.lowerTerm)) + return false; + if (upperTerm == null) { + if (other.upperTerm != null) + return false; + } else if (!upperTerm.equals(other.upperTerm)) + return false; + return true; + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeQuery.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0) @@ -0,0 +1,70 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +/** + * A Filter that restricts search results to a range of term + * values in a given field. + * + *

This filter matches the documents looking for terms that fall into the + * supplied range according to {@link + * String#compareTo(String)}, unless a Collator is provided. It is not intended + * for numerical ranges; use {@link NumericRangeFilter} instead. + * + *

If you construct a large number of range filters with different ranges but on the + * same field, {@link FieldCacheRangeFilter} may have significantly better performance. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter { + /** + * + * @param lowerTerm The lower bound on this range + * @param upperTerm The upper bound on this range + * @param includeLower Does this range include the lower bound? + * @param includeUpper Does this range include the upper bound? + * @param collator The collator to use when determining range inclusion; set + * to null to use Unicode code point ordering instead of collation. + * @throws IllegalArgumentException if both terms are null or if + * lowerTerm is null and includeLower is true (similar for upperTerm + * and includeUpper) + */ + public SlowCollatedTermRangeFilter(String fieldName, String lowerTerm, String upperTerm, + boolean includeLower, boolean includeUpper, + Collator collator) { + super(new SlowCollatedTermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + } + + /** Returns the lower value of this range filter */ + public String getLowerTerm() { return query.getLowerTerm(); } + + /** Returns the upper value of this range filter */ + public String getUpperTerm() { return query.getUpperTerm(); } + + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return query.includesLower(); } + + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return query.includesUpper(); } + + /** Returns the collator used to determine range inclusion, if any. */ + public Collator getCollator() { return query.getCollator(); } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeFilter.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0) @@ -0,0 +1,106 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.text.Collator; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache.DocTerms; +import org.apache.lucene.util.BytesRef; + +/** Sorts by a field's value using the given Collator + * + *

WARNING: this is very slow; you'll + * get much better performance using the + * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. + * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public final class SlowCollatedStringComparator extends FieldComparator { + + private final String[] values; + private DocTerms currentDocTerms; + private final String field; + final Collator collator; + private String bottom; + private final BytesRef tempBR = new BytesRef(); + + public SlowCollatedStringComparator(int numHits, String field, Collator collator) { + values = new String[numHits]; + this.field = field; + this.collator = collator; + } + + @Override + public int compare(int slot1, int slot2) { + final String val1 = values[slot1]; + final String val2 = values[slot2]; + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return collator.compare(val1, val2); + } + + @Override + public int compareBottom(int doc) { + final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString(); + if (bottom == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return collator.compare(bottom, val2); + } + + @Override + public void copy(int slot, int doc) { + final BytesRef br = currentDocTerms.getTerm(doc, tempBR); + if (br == null) { + values[slot] = null; + } else { + values[slot] = br.utf8ToString(); + } + } + + @Override + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field); + return this; + } + + @Override + public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override + public Comparable value(int slot) { + final String s = values[slot]; + return s == null ? null : new BytesRef(values[slot]); + } +} Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedStringComparator.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/MIGRATE.txt =================================================================== --- lucene/MIGRATE.txt (revision 1075074) +++ lucene/MIGRATE.txt (working copy) @@ -313,6 +313,21 @@ - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader +* LUCENE-2514: The option to use a Collator's order (instead of binary order) for + sorting and range queries has been moved to contrib/queries. + + The Collated TermRangeQuery/Filter has been moved to SlowCollatedTermRangeQuery/Filter, + and the collated sorting has been moved to SlowCollatedStringComparator. + + Note: this functionality isn't very scalable and if you are using it, consider + indexing collation keys with the collation support in the analysis module instead. + + To perform collated range queries, use a suitable collating analyzer: CollationKeyAnalyzer + or ICUCollationKeyAnalyzer, and set qp.setAnalyzeRangeTerms(true). + + TermRangeQuery and TermRangeFilter now work purely on bytes. Both have helper factory methods + (newStringRange) similar to the NumericRange API, to easily perform range queries on Strings. + * LUCENE-2691: The near-real-time API has moved from IndexWriter to IndexReader. Instead of IndexWriter.getReader(), call IndexReader.open(IndexWriter) or IndexReader.reopen(IndexWriter).