Index: solr/src/java/org/apache/solr/schema/DateField.java =================================================================== --- solr/src/java/org/apache/solr/schema/DateField.java (revision 1075074) +++ solr/src/java/org/apache/solr/schema/DateField.java (working copy) @@ -410,7 +410,7 @@ /** DateField specific range query */ public Query getRangeQuery(QParser parser, SchemaField sf, Date part1, Date part2, boolean minInclusive, boolean maxInclusive) { - return new TermRangeQuery( + return TermRangeQuery.newStringRange( sf.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/schema/FieldType.java =================================================================== --- solr/src/java/org/apache/solr/schema/FieldType.java (revision 1075074) +++ solr/src/java/org/apache/solr/schema/FieldType.java (working copy) @@ -521,7 +521,7 @@ */ public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) { // constant score mode is now enabled per default - return new TermRangeQuery( + return TermRangeQuery.newStringRange( field.getName(), part1 == null ? null : toInternal(part1), part2 == null ? null : toInternal(part2), Index: solr/src/java/org/apache/solr/search/QueryParsing.java =================================================================== --- solr/src/java/org/apache/solr/search/QueryParsing.java (revision 1075074) +++ solr/src/java/org/apache/solr/search/QueryParsing.java (working copy) @@ -398,8 +398,8 @@ String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append(q.includesLower() ? '[' : '{'); - String lt = q.getLowerTerm(); - String ut = q.getUpperTerm(); + String lt = q.getLowerTerm().utf8ToString(); + String ut = q.getUpperTerm().utf8ToString(); if (lt == null) { out.append('*'); } else { Index: solr/src/java/org/apache/solr/handler/component/ShardDoc.java =================================================================== --- solr/src/java/org/apache/solr/handler/component/ShardDoc.java (revision 1075074) +++ solr/src/java/org/apache/solr/handler/component/ShardDoc.java (working copy) @@ -95,10 +95,10 @@ String fieldname = fields[i].getField(); comparators[i] = getCachedComparator(fieldname, fields[i] - .getType(), fields[i].getLocale(), fields[i].getComparatorSource()); + .getType(), fields[i].getComparatorSource()); if (fields[i].getType() == SortField.STRING) { - this.fields[i] = new SortField(fieldname, fields[i].getLocale(), + this.fields[i] = new SortField(fieldname, SortField.STRING, fields[i].getReverse()); } else { this.fields[i] = new SortField(fieldname, fields[i].getType(), @@ -145,17 +145,14 @@ return c < 0; } - Comparator getCachedComparator(String fieldname, int type, Locale locale, FieldComparatorSource factory) { + Comparator getCachedComparator(String fieldname, int type, FieldComparatorSource factory) { Comparator comparator = null; switch (type) { case SortField.SCORE: comparator = comparatorScore(fieldname); break; case SortField.STRING: - if (locale != null) - comparator = comparatorStringLocale(fieldname, locale); - else - comparator = comparatorNatural(fieldname); + comparator = comparatorNatural(fieldname); break; case SortField.CUSTOM: if (factory instanceof MissingStringLastComparatorSource){ Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) @@ -20,6 +20,8 @@ import com.ibm.icu.text.Collator; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.util.BytesRef; import java.util.Locale; @@ -27,17 +29,23 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase { private Collator collator = Collator.getInstance(new Locale("fa")); - private Analyzer analyzer = new ICUCollationKeyAnalyzer(collator); + private Analyzer analyzer = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator); - private String firstRangeBeginning = encodeCollationKey + private BytesRef firstRangeBeginning = new BytesRef (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey + private BytesRef firstRangeEnd = new BytesRef (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey + private BytesRef secondRangeBeginning = new BytesRef (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey + private BytesRef secondRangeEnd = new BytesRef (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); - + + @Override + public void setUp() throws Exception { + super.setUp(); + assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec())); + } + public void testFarsiRangeFilterCollating() throws Exception { testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, secondRangeBeginning, secondRangeEnd); @@ -62,13 +70,13 @@ // public void testCollationKeySort() throws Exception { Analyzer usAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(Locale.US)); + (TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(Locale.FRANCE)); + (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(new Locale("sv", "se"))); + (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se"))); Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer - (Collator.getInstance(new Locale("da", "dk"))); + (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); // The ICU Collator and java.text.Collator implementations differ in their // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 1075074) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (working copy) @@ -22,24 +22,26 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.util.BytesRef; import java.io.Reader; import java.util.Locale; - +/** @deprecated remove this when ICUCollationKeyFilter is removed */ +@Deprecated public class TestICUCollationKeyFilter extends CollationTestBase { private Collator collator = Collator.getInstance(new Locale("fa")); private Analyzer analyzer = new TestAnalyzer(collator); - private String firstRangeBeginning = encodeCollationKey - (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()); - private String firstRangeEnd = encodeCollationKey - (collator.getCollationKey(firstRangeEndOriginal).toByteArray()); - private String secondRangeBeginning = encodeCollationKey - (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()); - private String secondRangeEnd = encodeCollationKey - (collator.getCollationKey(secondRangeEndOriginal).toByteArray()); + private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray())); + private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(firstRangeEndOriginal).toByteArray())); + private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray())); + private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey + (collator.getCollationKey(secondRangeEndOriginal).toByteArray())); public final class TestAnalyzer extends Analyzer { Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) @@ -0,0 +1,96 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; + +import com.ibm.icu.text.Collator; + +/** + *
+ * Converts each token into its {@link com.ibm.icu.text.CollationKey}, and + * then encodes bytes as an index term. + *
+ *+ * WARNING: Make sure you use exactly the same Collator at + * index and query time -- CollationKeys are only comparable when produced by + * the same Collator. {@link com.ibm.icu.text.RuleBasedCollator}s are + * independently versioned, so it is safe to search against stored + * CollationKeys if the following are exactly the same (best practice is + * to store this information with the index and check that they remain the + * same at query time): + *
+ *+ * CollationKeys generated by ICU Collators are not compatible with those + * generated by java.text.Collators. Specifically, if you use + * ICUCollationAttributeFactory to generate index terms, do not use + * {@link CollationAttributeFactory} on the query side, or vice versa. + *
+ *+ * ICUCollationAttributeFactory is significantly faster and generates significantly + * shorter keys than CollationAttributeFactory. See + * http://site.icu-project.org/charts/collation-icu4j-sun for key + * generation timing and key length comparisons between ICU4J and + * java.text.Collator over several languages. + *
+ */ +public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + private final AttributeSource.AttributeFactory delegate; + + /** + * Create an ICUCollationAttributeFactory, using + * {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the + * factory for all other attributes. + * @param collator CollationKey generator + */ + public ICUCollationAttributeFactory(Collator collator) { + this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator); + } + + /** + * Create an ICUCollationAttributeFactory, using the supplied Attribute + * Factory as the factory for all other attributes. + * @param delegate Attribute Factory + * @param collator CollationKey generator + */ + public ICUCollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { + this.delegate = delegate; + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class extends Attribute> attClass) { + return attClass.isAssignableFrom(ICUCollatedTermAttributeImpl.class) + ? new ICUCollatedTermAttributeImpl(collator) + : delegate.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\icu\src\java\org\apache\lucene\collation\ICUCollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (working copy) @@ -19,24 +19,20 @@ import com.ibm.icu.text.Collator; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.Tokenizer; - +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import org.apache.lucene.collation.CollationKeyAnalyzer; // javadocs +import org.apache.lucene.util.Version; import java.io.Reader; -import java.io.IOException; - /** ** Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}. *
* Converts the token into its {@link com.ibm.icu.text.CollationKey}, and - * then encodes the CollationKey with - * {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow it to + * then encodes the CollationKey either directly or with + * {@link IndexableBinaryStringTools} (see below), to allow it to * be stored as an index term. *
*@@ -70,39 +66,48 @@ * generation timing and key length comparisons between ICU4J and * java.text.Collator over several languages. *
+ * + *You must specify the required {@link Version} + * compatibility when creating ICUCollationKeyAnalyzer: + *
+ * Converts each token into its {@link java.text.CollationKey}, and then + * encodes the bytes as an index term. + *
+ *+ * WARNING: Make sure you use exactly the same Collator at + * index and query time -- CollationKeys are only comparable when produced by + * the same Collator. Since {@link java.text.RuleBasedCollator}s are not + * independently versioned, it is unsafe to search against stored + * CollationKeys unless the following are exactly the same (best practice is + * to store this information with the index and check that they remain the + * same at query time): + *
+ *
+ * The ICUCollationAttributeFactory in the icu package of Lucene's
+ * contrib area uses ICU4J's Collator, which makes its
+ * version available, thus allowing collation to be versioned independently
+ * from the JVM. ICUCollationAttributeFactory is also significantly faster and
+ * generates significantly shorter keys than CollationAttributeFactory. See
+ * http://site.icu-project.org/charts/collation-icu4j-sun for key
+ * generation timing and key length comparisons between ICU4J and
+ * java.text.Collator over several languages.
+ *
+ * CollationKeys generated by java.text.Collators are not compatible + * with those those generated by ICU Collators. Specifically, if you use + * CollationAttributeFactory to generate index terms, do not use + * ICUCollationAttributeFactory on the query side, or vice versa. + *
+ */ +public class CollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + private final AttributeSource.AttributeFactory delegate; + + /** + * Create a CollationAttributeFactory, using + * {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY} as the + * factory for all other attributes. + * @param collator CollationKey generator + */ + public CollationAttributeFactory(Collator collator) { + this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator); + } + + /** + * Create a CollationAttributeFactory, using the supplied Attribute Factory + * as the factory for all other attributes. + * @param delegate Attribute Factory + * @param collator CollationKey generator + */ + public CollationAttributeFactory(AttributeSource.AttributeFactory delegate, Collator collator) { + this.delegate = delegate; + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class extends Attribute> attClass) { + return attClass.isAssignableFrom(CollatedTermAttributeImpl.class) + ? new CollatedTermAttributeImpl(collator) + : delegate.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\collation\CollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (revision 1075074) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (working copy) @@ -18,14 +18,13 @@ */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; +import org.apache.lucene.util.IndexableBinaryStringTools; // javadoc @link +import org.apache.lucene.util.Version; import java.text.Collator; import java.io.Reader; -import java.io.IOException; /** *@@ -33,8 +32,8 @@ *
** Converts the token into its {@link java.text.CollationKey}, and then - * encodes the CollationKey with - * {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow + * encodes the CollationKey either directly or with + * {@link IndexableBinaryStringTools} (see below), to allow * it to be stored as an index term. *
*@@ -75,39 +74,49 @@ * CollationKeyAnalyzer to generate index terms, do not use * ICUCollationKeyAnalyzer on the query side, or vice versa. *
+ * + *You must specify the required {@link Version} + * compatibility when creating CollationKeyAnalyzer: + *
null. The collators
- * correspond to any SortFields which were given a specific locale.
- * @param fields Array of sort fields.
- * @return Array, possibly null.
- */
- private Collator[] hasCollators (final SortField[] fields) {
- if (fields == null) return null;
- Collator[] ret = new Collator[fields.length];
- for (int i=0; ib.
* @param a ScoreDoc
@@ -109,11 +82,9 @@
c = (s2 == null) ? 0 : -1;
} else if (s2 == null) {
c = 1;
- } else if (fields[i].getLocale() == null) {
- c = s1.compareTo(s2);
} else {
- c = collators[i].compare(s1.utf8ToString(), s2.utf8ToString());
- }
+ c = s1.compareTo(s2);
+ }
} else {
c = docA.fields[i].compareTo(docB.fields[i]);
if (type == SortField.SCORE) {
Index: lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy)
@@ -18,7 +18,6 @@
*/
import java.io.IOException;
-import java.text.Collator;
import java.util.Comparator;
import org.apache.lucene.index.TermsEnum;
@@ -33,11 +32,8 @@
*/
public class TermRangeTermsEnum extends FilteredTermsEnum {
- private Collator collator;
- private String upperTermText;
- private String lowerTermText;
- private boolean includeLower;
- private boolean includeUpper;
+ final private boolean includeLower;
+ final private boolean includeUpper;
final private BytesRef lowerBytesRef;
final private BytesRef upperBytesRef;
private final ComparatorlowerTerm is included in the range.
* @param includeUpper
* If true, the upperTerm is included in the range.
- * @param collator
- * The collator to use to collate index Terms, to determine their
- * membership in the range bounded by lowerTerm and
- * upperTerm.
*
* @throws IOException
*/
- public TermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
- boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm,
+ boolean includeLower, boolean includeUpper) throws IOException {
super(tenum);
- this.collator = collator;
- this.upperTermText = upperTermText;
- this.lowerTermText = lowerTermText;
- this.includeLower = includeLower;
- this.includeUpper = includeUpper;
// do a little bit of normalization...
// open ended range queries should always be inclusive.
- if (this.lowerTermText == null) {
- this.lowerTermText = "";
+ if (lowerTerm == null) {
+ this.lowerBytesRef = new BytesRef();
this.includeLower = true;
+ } else {
+ this.lowerBytesRef = lowerTerm;
+ this.includeLower = includeLower;
}
- lowerBytesRef = new BytesRef(this.lowerTermText);
- if (this.upperTermText == null) {
+ if (upperTerm == null) {
this.includeUpper = true;
upperBytesRef = null;
} else {
- upperBytesRef = new BytesRef(upperTermText);
+ this.includeUpper = includeUpper;
+ upperBytesRef = upperTerm;
}
- BytesRef startBytesRef = (collator == null) ? lowerBytesRef : new BytesRef("");
- setInitialSeekTerm(startBytesRef);
+ setInitialSeekTerm(lowerBytesRef);
termComp = getComparator();
}
@Override
protected AcceptStatus accept(BytesRef term) {
- if (collator == null) {
- if (!this.includeLower && term.equals(lowerBytesRef))
- return AcceptStatus.NO;
- // Use this field's default sort ordering
- if (upperBytesRef != null) {
- final int cmp = termComp.compare(upperBytesRef, term);
- /*
- * if beyond the upper term, or is exclusive and this is equal to
- * the upper term, break out
- */
- if ((cmp < 0) ||
- (!includeUpper && cmp==0)) {
- return AcceptStatus.END;
- }
- }
- return AcceptStatus.YES;
- } else {
- if ((includeLower
- ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
- : collator.compare(term.utf8ToString(), lowerTermText) > 0)
- && (upperTermText == null
- || (includeUpper
- ? collator.compare(term.utf8ToString(), upperTermText) <= 0
- : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
- return AcceptStatus.YES;
- }
+ if (!this.includeLower && term.equals(lowerBytesRef))
return AcceptStatus.NO;
+
+ // Use this field's default sort ordering
+ if (upperBytesRef != null) {
+ final int cmp = termComp.compare(upperBytesRef, term);
+ /*
+ * if beyond the upper term, or is exclusive and this is equal to
+ * the upper term, break out
+ */
+ if ((cmp < 0) ||
+ (!includeUpper && cmp==0)) {
+ return AcceptStatus.END;
+ }
}
+
+ return AcceptStatus.YES;
}
}
Index: lucene/src/java/org/apache/lucene/search/TermRangeFilter.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/search/TermRangeFilter.java (working copy)
@@ -1,5 +1,7 @@
package org.apache.lucene.search;
+import org.apache.lucene.util.BytesRef;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,15 +19,13 @@
* limitations under the License.
*/
-import java.text.Collator;
-
/**
* A Filter that restricts search results to a range of term
* values in a given field.
*
* This filter matches the documents looking for terms that fall into the
* supplied range according to {@link
- * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * Byte#compareTo(Byte)}, It is not intended
* for numerical ranges; use {@link NumericRangeFilter} instead.
*
*
If you construct a large number of range filters with different ranges but on the
@@ -44,39 +44,25 @@
* lowerTerm is null and includeLower is true (similar for upperTerm
* and includeUpper)
*/
- public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
+ public TermRangeFilter(String fieldName, BytesRef lowerTerm, BytesRef upperTerm,
boolean includeLower, boolean includeUpper) {
super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
}
/**
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The lower bound on this range
- * @param upperTerm The upper bound on this range
- * @param includeLower Does this range include the lower bound?
- * @param includeUpper Does this range include the upper bound?
- * @param collator The collator to use when determining range inclusion; set
- * to null to use Unicode code point ordering instead of collation.
- * @throws IllegalArgumentException if both terms are null or if
- * lowerTerm is null and includeLower is true (similar for upperTerm
- * and includeUpper)
+ * Factory that creates a new TermRangeFilter using Strings for term text.
*/
- public TermRangeFilter(String fieldName, String lowerTerm, String upperTerm,
- boolean includeLower, boolean includeUpper,
- Collator collator) {
- super(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
+ public static TermRangeFilter newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+ BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+ BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+ return new TermRangeFilter(field, lower, upper, includeLower, includeUpper);
}
-
+
/**
* Constructs a filter for field fieldName matching
* less than or equal to upperTerm.
*/
- public static TermRangeFilter Less(String fieldName, String upperTerm) {
+ public static TermRangeFilter Less(String fieldName, BytesRef upperTerm) {
return new TermRangeFilter(fieldName, null, upperTerm, false, true);
}
@@ -84,22 +70,19 @@
* Constructs a filter for field fieldName matching
* greater than or equal to lowerTerm.
*/
- public static TermRangeFilter More(String fieldName, String lowerTerm) {
+ public static TermRangeFilter More(String fieldName, BytesRef lowerTerm) {
return new TermRangeFilter(fieldName, lowerTerm, null, true, false);
}
/** Returns the lower value of this range filter */
- public String getLowerTerm() { return query.getLowerTerm(); }
+ public BytesRef getLowerTerm() { return query.getLowerTerm(); }
/** Returns the upper value of this range filter */
- public String getUpperTerm() { return query.getUpperTerm(); }
+ public BytesRef getUpperTerm() { return query.getUpperTerm(); }
/** Returns true if the lower endpoint is inclusive */
public boolean includesLower() { return query.includesLower(); }
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return query.includesUpper(); }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return query.getCollator(); }
}
Index: lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -18,11 +18,11 @@
*/
import java.io.IOException;
-import java.text.Collator;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
/**
@@ -30,7 +30,7 @@
*
*
This query matches the documents looking for terms that fall into the
* supplied range according to {@link
- * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * Byte#compareTo(Byte)}. It is not intended
* for numerical ranges; use {@link NumericRangeQuery} instead.
*
*
This query uses the {@link
@@ -40,9 +40,8 @@
*/
public class TermRangeQuery extends MultiTermQuery {
- private String lowerTerm;
- private String upperTerm;
- private Collator collator;
+ private BytesRef lowerTerm;
+ private BytesRef upperTerm;
private boolean includeLower;
private boolean includeUpper;
@@ -69,78 +68,48 @@
* If true, the upperTerm is
* included in the range.
*/
- public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
- this(field, lowerTerm, upperTerm, includeLower, includeUpper, null);
- }
-
- /** Constructs a query selecting all terms greater/equal than
- * lowerTerm but less/equal than upperTerm.
- *
- * If an endpoint is null, it is said - * to be "open". Either or both endpoints may be open. Open endpoints may not - * be exclusive (you can't select all but the first or last term without - * explicitly specifying the term to exclude.) - *
- * If collator is not null, it will be used to decide whether
- * index terms are within the given range, rather than using the Unicode code
- * point order in which index terms are stored.
- *
- * WARNING: Using this constructor and supplying a non-null
- * value in the collator parameter will cause every single
- * index Term in the Field referenced by lowerTerm and/or upperTerm to be
- * examined. Depending on the number of index Terms in this Field, the
- * operation could be very slow.
- *
- * @param lowerTerm The Term text at the lower end of the range
- * @param upperTerm The Term text at the upper end of the range
- * @param includeLower
- * If true, the lowerTerm is
- * included in the range.
- * @param includeUpper
- * If true, the upperTerm is
- * included in the range.
- * @param collator The collator to use to collate index Terms, to determine
- * their membership in the range bounded by lowerTerm and
- * upperTerm.
- */
- public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
- Collator collator) {
+ public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
super(field);
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
- this.collator = collator;
}
+ /**
+ * Factory that creates a new TermRangeQuery using Strings for term text.
+ */
+ public static TermRangeQuery newStringRange(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
+ BytesRef lower = lowerTerm == null ? null : new BytesRef(lowerTerm);
+ BytesRef upper = upperTerm == null ? null : new BytesRef(upperTerm);
+ return new TermRangeQuery(field, lower, upper, includeLower, includeUpper);
+ }
+
/** Returns the lower value of this range query */
- public String getLowerTerm() { return lowerTerm; }
+ public BytesRef getLowerTerm() { return lowerTerm; }
/** Returns the upper value of this range query */
- public String getUpperTerm() { return upperTerm; }
+ public BytesRef getUpperTerm() { return upperTerm; }
/** Returns true if the lower endpoint is inclusive */
public boolean includesLower() { return includeLower; }
/** Returns true if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
-
- /** Returns the collator used to determine range inclusion, if any. */
- public Collator getCollator() { return collator; }
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
+ if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
return TermsEnum.EMPTY;
}
TermsEnum tenum = terms.iterator();
- if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) {
+ if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
return tenum;
}
return new TermRangeTermsEnum(tenum,
- lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ lowerTerm, upperTerm, includeLower, includeUpper);
}
/** Prints a user-readable version of this query. */
@@ -152,9 +121,10 @@
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
- buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm) : "*");
+ // TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
+ buffer.append(lowerTerm != null ? ("*".equals(lowerTerm.utf8ToString()) ? "\\*" : lowerTerm.utf8ToString()) : "*");
buffer.append(" TO ");
- buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*");
+ buffer.append(upperTerm != null ? ("*".equals(upperTerm.utf8ToString()) ? "\\*" : upperTerm.utf8ToString()) : "*");
buffer.append(includeUpper ? ']' : '}');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@@ -164,7 +134,6 @@
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
- result = prime * result + ((collator == null) ? 0 : collator.hashCode());
result = prime * result + (includeLower ? 1231 : 1237);
result = prime * result + (includeUpper ? 1231 : 1237);
result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
@@ -181,11 +150,6 @@
if (getClass() != obj.getClass())
return false;
TermRangeQuery other = (TermRangeQuery) obj;
- if (collator == null) {
- if (other.collator != null)
- return false;
- } else if (!collator.equals(other.collator))
- return false;
if (includeLower != other.includeLower)
return false;
if (includeUpper != other.includeUpper)
Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy)
@@ -18,8 +18,6 @@
*/
import java.io.IOException;
-import java.text.Collator;
-import java.util.Locale;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache.DocTermsIndex;
@@ -718,85 +716,6 @@
}
}
-
- /** Sorts by a field's value using the Collator for a
- * given Locale.
- *
- *
WARNING: this is likely very slow; you'll
- * get much better performance using the
- * CollationKeyAnalyzer or ICUCollationKeyAnalyzer. */
- public static final class StringComparatorLocale extends FieldComparator {
-
- private final String[] values;
- private DocTerms currentDocTerms;
- private final String field;
- final Collator collator;
- private String bottom;
- private final BytesRef tempBR = new BytesRef();
-
- StringComparatorLocale(int numHits, String field, Locale locale) {
- values = new String[numHits];
- this.field = field;
- collator = Collator.getInstance(locale);
- }
-
- @Override
- public int compare(int slot1, int slot2) {
- final String val1 = values[slot1];
- final String val2 = values[slot2];
- if (val1 == null) {
- if (val2 == null) {
- return 0;
- }
- return -1;
- } else if (val2 == null) {
- return 1;
- }
- return collator.compare(val1, val2);
- }
-
- @Override
- public int compareBottom(int doc) {
- final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
- if (bottom == null) {
- if (val2 == null) {
- return 0;
- }
- return -1;
- } else if (val2 == null) {
- return 1;
- }
- return collator.compare(bottom, val2);
- }
-
- @Override
- public void copy(int slot, int doc) {
- final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
- if (br == null) {
- values[slot] = null;
- } else {
- values[slot] = br.utf8ToString();
- }
- }
-
- @Override
- public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
- currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
- return this;
- }
-
- @Override
- public void setBottom(final int bottom) {
- this.bottom = values[bottom];
- }
-
- @Override
- public Comparable> value(int slot) {
- final String s = values[slot];
- return s == null ? null : new BytesRef(values[slot]);
- }
- }
-
/** Sorts by field's natural Term sort order, using
* ordinals. This is functionally equivalent to {@link
* TermValComparator}, but it first resolves the string
Index: lucene/src/java/org/apache/lucene/search/SortField.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/SortField.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy)
@@ -18,7 +18,6 @@
*/
import java.io.IOException;
-import java.util.Locale;
import org.apache.lucene.search.cache.*;
import org.apache.lucene.util.StringHelper;
@@ -90,7 +89,6 @@
private String field;
private int type; // defaults to determining type dynamically
- private Locale locale; // defaults to "natural order" (no Locale)
boolean reverse = false; // defaults to natural order
private CachedArrayCreator> creator;
public Object missingValue = null; // used for 'sortMissingFirst/Last'
@@ -213,29 +211,7 @@
}
return this;
}
-
- /** Creates a sort by terms in the given field sorted
- * according to the given locale.
- * @param field Name of field to sort by, cannot be null.
- * @param locale Locale of values in the field.
- */
- public SortField (String field, Locale locale) {
- initFieldType(field, STRING);
- this.locale = locale;
- }
-
- /** Creates a sort, possibly in reverse, by terms in the given field sorted
- * according to the given locale.
- * @param field Name of field to sort by, cannot be null.
- * @param locale Locale of values in the field.
- */
- public SortField (String field, Locale locale, boolean reverse) {
- initFieldType(field, STRING);
- this.locale = locale;
- this.reverse = reverse;
- }
-
/** Creates a sort with a custom comparison function.
* @param field Name of field to sort by; cannot be null.
* @param comparator Returns a comparator for sorting hits.
@@ -295,14 +271,6 @@
return type;
}
- /** Returns the Locale by which term values are interpreted.
- * May return null if no Locale was specified.
- * @return Locale, or null.
- */
- public Locale getLocale() {
- return locale;
- }
-
/** Returns the instance of a {@link FieldCache} parser that fits to the given sort type.
* May return null if no parser was specified. Sorting is using the default parser then.
* @return An instance of a {@link FieldCache} parser, or null.
@@ -384,7 +352,6 @@
break;
}
- if (locale != null) buffer.append('(').append(locale).append(')');
if (creator != null) buffer.append('(').append(creator).append(')');
if (reverse) buffer.append('!');
@@ -404,7 +371,6 @@
other.field == this.field // field is always interned
&& other.type == this.type
&& other.reverse == this.reverse
- && (other.locale == null ? this.locale == null : other.locale.equals(this.locale))
&& (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
&& (other.creator == null ? this.creator == null : other.creator.equals(this.creator))
);
@@ -419,7 +385,6 @@
public int hashCode() {
int hash=type^0x346565dd + Boolean.valueOf(reverse).hashCode()^0xaf5998bb;
if (field != null) hash += field.hashCode()^0xff5685dd;
- if (locale != null) hash += locale.hashCode()^0x08150815;
if (comparatorSource != null) hash += comparatorSource.hashCode();
if (creator != null) hash += creator.hashCode()^0x3aaf56ff;
return hash;
@@ -439,13 +404,6 @@
*/
public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException {
- if (locale != null) {
- // TODO: it'd be nice to allow FieldCache.getStringIndex
- // to optionally accept a Locale so sorting could then use
- // the faster StringComparator impls
- return new FieldComparator.StringComparatorLocale(numHits, field, locale);
- }
-
switch (type) {
case SortField.SCORE:
return new FieldComparator.RelevanceComparator(numHits);
Index: lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (revision 1075074)
+++ lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (working copy)
@@ -39,7 +39,10 @@
*
null it will also be used to parse the date value. null, it's set on the
- * {@link RangeQueryNode}. capital of Hungary is equal to
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (revision 1075074)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/RangeCollatorAttributeImpl.java (working copy)
@@ -1,92 +0,0 @@
-package org.apache.lucene.queryParser.standard.config;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.text.Collator;
-
-import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
-import org.apache.lucene.queryParser.standard.processors.ParametricRangeQueryNodeProcessor;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.util.AttributeImpl;
-
-/**
- * This attribute is used by {@link ParametricRangeQueryNodeProcessor} processor
- * and must be defined in the {@link QueryConfigHandler}. This attribute tells
- * the processor which {@link Collator} should be used for a
- * {@link TermRangeQuery} Term enumerations are always ordered by + * {@link #getComparator}. Each term in the enumeration is + * greater than all that precede it.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead. + * This class will be removed in Lucene 5.0 + */ +@Deprecated +public class SlowCollatedTermRangeTermsEnum extends FilteredTermsEnum { + private Collator collator; + private String upperTermText; + private String lowerTermText; + private boolean includeLower; + private boolean includeUpper; + + /** + * Enumerates all terms greater/equal thanlowerTerm
+ * but less/equal than upperTerm.
+ *
+ * If an endpoint is null, it is said to be "open". Either or both
+ * endpoints may be open. Open endpoints may not be exclusive
+ * (you can't select all but the first or last term without
+ * explicitly specifying the term to exclude.)
+ *
+ * @param tenum
+ * @param lowerTermText
+ * The term text at the lower end of the range
+ * @param upperTermText
+ * The term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is included in the range.
+ * @param collator
+ * The collator to use to collate index Terms, to determine their
+ * membership in the range bounded by lowerTerm and
+ * upperTerm.
+ *
+ * @throws IOException
+ */
+ public SlowCollatedTermRangeTermsEnum(TermsEnum tenum, String lowerTermText, String upperTermText,
+ boolean includeLower, boolean includeUpper, Collator collator) throws IOException {
+ super(tenum);
+ this.collator = collator;
+ this.upperTermText = upperTermText;
+ this.lowerTermText = lowerTermText;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+
+ // do a little bit of normalization...
+ // open ended range queries should always be inclusive.
+ if (this.lowerTermText == null) {
+ this.lowerTermText = "";
+ this.includeLower = true;
+ }
+
+ // TODO: optimize
+ BytesRef startBytesRef = new BytesRef("");
+ setInitialSeekTerm(startBytesRef);
+ }
+
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ if ((includeLower
+ ? collator.compare(term.utf8ToString(), lowerTermText) >= 0
+ : collator.compare(term.utf8ToString(), lowerTermText) > 0)
+ && (upperTermText == null
+ || (includeUpper
+ ? collator.compare(term.utf8ToString(), upperTermText) <= 0
+ : collator.compare(term.utf8ToString(), upperTermText) < 0))) {
+ return AcceptStatus.YES;
+ }
+ return AcceptStatus.NO;
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeTermsEnum.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeQuery.java (revision 0)
@@ -0,0 +1,176 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ToStringUtils;
+
+/**
+ * A Query that matches documents within an range of terms.
+ *
+ * This query matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeQuery} instead.
+ *
+ *
This query uses the {@link
+ * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * rewrite method.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public class SlowCollatedTermRangeQuery extends MultiTermQuery {
+ private String lowerTerm;
+ private String upperTerm;
+ private boolean includeLower;
+ private boolean includeUpper;
+ private Collator collator;
+
+ /** Constructs a query selecting all terms greater/equal than
+ * lowerTerm but less/equal than upperTerm.
+ *
+ * If an endpoint is null, it is said + * to be "open". Either or both endpoints may be open. Open endpoints may not + * be exclusive (you can't select all but the first or last term without + * explicitly specifying the term to exclude.) + *
+ *
+ * @param lowerTerm The Term text at the lower end of the range
+ * @param upperTerm The Term text at the upper end of the range
+ * @param includeLower
+ * If true, the lowerTerm is
+ * included in the range.
+ * @param includeUpper
+ * If true, the upperTerm is
+ * included in the range.
+ * @param collator The collator to use to collate index Terms, to determine
+ * their membership in the range bounded by lowerTerm and
+ * upperTerm.
+ */
+ public SlowCollatedTermRangeQuery(String field, String lowerTerm, String upperTerm,
+ boolean includeLower, boolean includeUpper, Collator collator) {
+ super(field);
+ this.lowerTerm = lowerTerm;
+ this.upperTerm = upperTerm;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ this.collator = collator;
+ }
+
+ /** Returns the lower value of this range query */
+ public String getLowerTerm() { return lowerTerm; }
+
+ /** Returns the upper value of this range query */
+ public String getUpperTerm() { return upperTerm; }
+
+ /** Returns true if the lower endpoint is inclusive */
+ public boolean includesLower() { return includeLower; }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return includeUpper; }
+
+ /** Returns the collator used to determine range inclusion */
+ public Collator getCollator() { return collator; }
+
+ @Override
+ protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
+ if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
+ return TermsEnum.EMPTY;
+ }
+
+ TermsEnum tenum = terms.iterator();
+
+ if (lowerTerm == null && upperTerm == null) {
+ return tenum;
+ }
+ return new SlowCollatedTermRangeTermsEnum(tenum,
+ lowerTerm, upperTerm, includeLower, includeUpper, collator);
+ }
+
+ /** @deprecated */
+ @Deprecated
+ public String field() {
+ return getField();
+ }
+
+ /** Prints a user-readable version of this query. */
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!getField().equals(field)) {
+ buffer.append(getField());
+ buffer.append(":");
+ }
+ buffer.append(includeLower ? '[' : '{');
+ buffer.append(lowerTerm != null ? lowerTerm : "*");
+ buffer.append(" TO ");
+ buffer.append(upperTerm != null ? upperTerm : "*");
+ buffer.append(includeUpper ? ']' : '}');
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((collator == null) ? 0 : collator.hashCode());
+ result = prime * result + (includeLower ? 1231 : 1237);
+ result = prime * result + (includeUpper ? 1231 : 1237);
+ result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode());
+ result = prime * result + ((upperTerm == null) ? 0 : upperTerm.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SlowCollatedTermRangeQuery other = (SlowCollatedTermRangeQuery) obj;
+ if (collator == null) {
+ if (other.collator != null)
+ return false;
+ } else if (!collator.equals(other.collator))
+ return false;
+ if (includeLower != other.includeLower)
+ return false;
+ if (includeUpper != other.includeUpper)
+ return false;
+ if (lowerTerm == null) {
+ if (other.lowerTerm != null)
+ return false;
+ } else if (!lowerTerm.equals(other.lowerTerm))
+ return false;
+ if (upperTerm == null) {
+ if (other.upperTerm != null)
+ return false;
+ } else if (!upperTerm.equals(other.upperTerm))
+ return false;
+ return true;
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeQuery.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedTermRangeFilter.java (revision 0)
@@ -0,0 +1,70 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.Collator;
+
+/**
+ * A Filter that restricts search results to a range of term
+ * values in a given field.
+ *
+ *
This filter matches the documents looking for terms that fall into the
+ * supplied range according to {@link
+ * String#compareTo(String)}, unless a Collator is provided. It is not intended
+ * for numerical ranges; use {@link NumericRangeFilter} instead.
+ *
+ *
If you construct a large number of range filters with different ranges but on the
+ * same field, {@link FieldCacheRangeFilter} may have significantly better performance.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public class SlowCollatedTermRangeFilter extends MultiTermQueryWrapperFilter WARNING: this is very slow; you'll
+ * get much better performance using the
+ * CollationKeyAnalyzer or ICUCollationKeyAnalyzer.
+ * @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
+ * This class will be removed in Lucene 5.0
+ */
+@Deprecated
+public final class SlowCollatedStringComparator extends FieldComparator {
+
+ private final String[] values;
+ private DocTerms currentDocTerms;
+ private final String field;
+ final Collator collator;
+ private String bottom;
+ private final BytesRef tempBR = new BytesRef();
+
+ public SlowCollatedStringComparator(int numHits, String field, Collator collator) {
+ values = new String[numHits];
+ this.field = field;
+ this.collator = collator;
+ }
+
+ @Override
+ public int compare(int slot1, int slot2) {
+ final String val1 = values[slot1];
+ final String val2 = values[slot2];
+ if (val1 == null) {
+ if (val2 == null) {
+ return 0;
+ }
+ return -1;
+ } else if (val2 == null) {
+ return 1;
+ }
+ return collator.compare(val1, val2);
+ }
+
+ @Override
+ public int compareBottom(int doc) {
+ final String val2 = currentDocTerms.getTerm(doc, tempBR).utf8ToString();
+ if (bottom == null) {
+ if (val2 == null) {
+ return 0;
+ }
+ return -1;
+ } else if (val2 == null) {
+ return 1;
+ }
+ return collator.compare(bottom, val2);
+ }
+
+ @Override
+ public void copy(int slot, int doc) {
+ final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
+ if (br == null) {
+ values[slot] = null;
+ } else {
+ values[slot] = br.utf8ToString();
+ }
+ }
+
+ @Override
+ public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
+ currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader, field);
+ return this;
+ }
+
+ @Override
+ public void setBottom(final int bottom) {
+ this.bottom = values[bottom];
+ }
+
+ @Override
+ public Comparable> value(int slot) {
+ final String s = values[slot];
+ return s == null ? null : new BytesRef(values[slot]);
+ }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedStringComparator.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/MIGRATE.txt
===================================================================
--- lucene/MIGRATE.txt (revision 1075074)
+++ lucene/MIGRATE.txt (working copy)
@@ -313,6 +313,21 @@
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+* LUCENE-2514: The option to use a Collator's order (instead of binary order) for
+ sorting and range queries has been moved to contrib/queries.
+
+ The Collated TermRangeQuery/Filter has been moved to SlowCollatedTermRangeQuery/Filter,
+ and the collated sorting has been moved to SlowCollatedStringComparator.
+
+ Note: this functionality isn't very scalable and if you are using it, consider
+ indexing collation keys with the collation support in the analysis module instead.
+
+ To perform collated range queries, use a suitable collating analyzer: CollationKeyAnalyzer
+ or ICUCollationKeyAnalyzer, and set qp.setAnalyzeRangeTerms(true).
+
+ TermRangeQuery and TermRangeFilter now work purely on bytes. Both have helper factory methods
+ (newStringRange) similar to the NumericRange API, to easily perform range queries on Strings.
+
* LUCENE-2691: The near-real-time API has moved from IndexWriter to
IndexReader. Instead of IndexWriter.getReader(), call
IndexReader.open(IndexWriter) or IndexReader.reopen(IndexWriter).
true if the lower endpoint is inclusive */
+ public boolean includesLower() { return query.includesLower(); }
+
+ /** Returns true if the upper endpoint is inclusive */
+ public boolean includesUpper() { return query.includesUpper(); }
+
+ /** Returns the collator used to determine range inclusion, if any. */
+ public Collator getCollator() { return query.getCollator(); }
+}
Property changes on: lucene\contrib\queries\src\java\org\apache\lucene\search\SlowCollatedTermRangeFilter.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/SlowCollatedStringComparator.java (revision 0)
@@ -0,0 +1,106 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.FieldCache.DocTerms;
+import org.apache.lucene.util.BytesRef;
+
+/** Sorts by a field's value using the given Collator
+ *
+ *