Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (revision 965632) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (working copy) @@ -59,8 +59,9 @@ // // Copied (and slightly modified) from // org.apache.lucene.search.TestSort.testInternationalSort() - // - public void testCollationKeySort() throws Exception { + // + @Deprecated + public void testCollationKeySortBackwards() throws Exception { Analyzer usAnalyzer = new ICUCollationKeyAnalyzer (Collator.getInstance(Locale.US)); Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer @@ -75,4 +76,20 @@ testCollationKeySort (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD"); } + + public void testCollationKeySort() throws Exception { + Analyzer usAnalyzer + = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); + Analyzer franceAnalyzer + = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); + Analyzer swedenAnalyzer + = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se"))); + Analyzer denmarkAnalyzer + = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); + + // The ICU Collator and java.text.Collator implementations differ in their + // orderings - "BFJHD" is the ordering for ICU Collator for Locale.US. + testCollationKeySort + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJHD"); + } } Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 965632) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (working copy) @@ -26,7 +26,7 @@ import java.io.Reader; import java.util.Locale; - +@Deprecated public class TestICUCollationKeyFilter extends CollationTestBase { private Collator collator = Collator.getInstance(new Locale("fa")); Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationAttributeFactory.java (revision 0) @@ -0,0 +1,43 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.collation.tokenattributes.ICUCollatedTermAttributeImpl; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; + +import com.ibm.icu.text.Collator; + +public class ICUCollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + + public ICUCollationAttributeFactory(Collator collator) { + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class extends Attribute> attClass) { + if (attClass.equals(CharTermAttribute.class)) + return new ICUCollatedTermAttributeImpl(collator); + return AttributeSource.AttributeFactory. + DEFAULT_ATTRIBUTE_FACTORY.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\icu\src\java\org\apache\lucene\collation\ICUCollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (revision 965632) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (working copy) @@ -19,13 +19,11 @@ import com.ibm.icu.text.Collator; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; +import org.apache.lucene.util.Version; import java.io.Reader; -import java.io.IOException; /** @@ -69,38 +67,31 @@ * java.text.Collator over several languages. *
*/ -public final class ICUCollationKeyAnalyzer extends Analyzer { - private Collator collator; - - public ICUCollationKeyAnalyzer(Collator collator) { +public final class ICUCollationKeyAnalyzer extends ReusableAnalyzerBase { + private final Collator collator; + private final ICUCollationAttributeFactory factory; + private final Version matchVersion; + + public ICUCollationKeyAnalyzer(Version matchVersion, Collator collator) { + this.matchVersion = matchVersion; this.collator = collator; + this.factory = new ICUCollationAttributeFactory(collator); } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new ICUCollationKeyFilter(result, collator); - return result; - } - private class SavedStreams { - Tokenizer source; - TokenStream result; + @Deprecated + public ICUCollationKeyAnalyzer(Collator collator) { + this(Version.LUCENE_31, collator); } - + @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - - SavedStreams streams = (SavedStreams)getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - streams.source = new KeywordTokenizer(reader); - streams.result = new ICUCollationKeyFilter(streams.source, collator); - setPreviousTokenStream(streams); + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + if (matchVersion.onOrAfter(Version.LUCENE_40)) { + KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE); + return new TokenStreamComponents(tokenizer, tokenizer); } else { - streams.source.reset(reader); + KeywordTokenizer tokenizer = new KeywordTokenizer(reader); + return new TokenStreamComponents(tokenizer, new ICUCollationKeyFilter(tokenizer, collator)); } - return streams.result; } } Index: modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (revision 965632) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (working copy) @@ -67,6 +67,7 @@ * java.text.Collator over several languages. * */ +@Deprecated public final class ICUCollationKeyFilter extends TokenFilter { private Collator collator = null; private RawCollationKey reusableKey = new RawCollationKey(); Index: modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java =================================================================== --- modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java (revision 0) +++ modules/analysis/icu/src/java/org/apache/lucene/collation/tokenattributes/ICUCollatedTermAttributeImpl.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.collation.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.util.BytesRef; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RawCollationKey; + +public class ICUCollatedTermAttributeImpl extends CharTermAttributeImpl { + private final Collator collator; + private RawCollationKey key = new RawCollationKey(); + + public ICUCollatedTermAttributeImpl(Collator collator) { + this.collator = collator; + } + + @Override + public int toBytesRef(BytesRef target) { + collator.getRawCollationKey(toString(), key); + target.bytes = key.bytes; + target.offset = 0; + target.length = key.size; + return target.hashCode(); + } +} Property changes on: modules\analysis\icu\src\java\org\apache\lucene\collation\tokenattributes\ICUCollatedTermAttributeImpl.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (revision 965632) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java (working copy) @@ -59,7 +59,8 @@ secondRangeBeginning, secondRangeEnd); } - public void testCollationKeySort() throws Exception { + @Deprecated + public void testCollationKeySortBackwards() throws Exception { Analyzer usAnalyzer = new CollationKeyAnalyzer(Collator.getInstance(Locale.US)); Analyzer franceAnalyzer @@ -74,4 +75,20 @@ testCollationKeySort (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH"); } + + public void testCollationKeySort() throws Exception { + Analyzer usAnalyzer + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); + Analyzer franceAnalyzer + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); + Analyzer swedenAnalyzer + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se"))); + Analyzer denmarkAnalyzer + = new CollationKeyAnalyzer(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); + + // The ICU Collator and java.text.Collator implementations differ in their + // orderings - "BFJDH" is the ordering for java.text.Collator for Locale.US. + testCollationKeySort + (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, "BFJDH"); + } } Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision 965632) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (working copy) @@ -26,7 +26,7 @@ import java.util.Locale; import java.io.Reader; - +@Deprecated public class TestCollationKeyFilter extends CollationTestBase { // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in Index: modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (revision 965632) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (working copy) @@ -30,7 +30,7 @@ */ public final class KeywordTokenizer extends Tokenizer { - private static final int DEFAULT_BUFFER_SIZE = 256; + public static final int DEFAULT_BUFFER_SIZE = 256; private boolean done = false; private int finalOffset; Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationAttributeFactory.java (revision 0) @@ -0,0 +1,43 @@ +package org.apache.lucene.collation; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.collation.tokenattributes.CollatedTermAttributeImpl; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; + +public class CollationAttributeFactory extends AttributeSource.AttributeFactory { + private final Collator collator; + + public CollationAttributeFactory(Collator collator) { + this.collator = collator; + } + + @Override + public AttributeImpl createAttributeInstance( + Class extends Attribute> attClass) { + if (attClass.equals(CharTermAttribute.class)) + return new CollatedTermAttributeImpl(collator); + return AttributeSource.AttributeFactory. + DEFAULT_ATTRIBUTE_FACTORY.createAttributeInstance(attClass); + } +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\collation\CollationAttributeFactory.java ___________________________________________________________________ Added: svn:eol-style + native Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (revision 965632) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java (working copy) @@ -18,14 +18,12 @@ */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.util.ReusableAnalyzerBase; +import org.apache.lucene.util.Version; import java.text.Collator; import java.io.Reader; -import java.io.IOException; /** *@@ -76,38 +74,31 @@ * ICUCollationKeyAnalyzer on the query side, or vice versa. *
*/ -public final class CollationKeyAnalyzer extends Analyzer { - private Collator collator; - - public CollationKeyAnalyzer(Collator collator) { +public final class CollationKeyAnalyzer extends ReusableAnalyzerBase { + private final Collator collator; + private final CollationAttributeFactory factory; + private final Version matchVersion; + + public CollationKeyAnalyzer(Version matchVersion, Collator collator) { + this.matchVersion = matchVersion; this.collator = collator; + this.factory = new CollationAttributeFactory(collator); } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new CollationKeyFilter(result, collator); - return result; - } - private class SavedStreams { - Tokenizer source; - TokenStream result; + @Deprecated + public CollationKeyAnalyzer(Collator collator) { + this(Version.LUCENE_31, collator); } - + @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - - SavedStreams streams = (SavedStreams)getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - streams.source = new KeywordTokenizer(reader); - streams.result = new CollationKeyFilter(streams.source, collator); - setPreviousTokenStream(streams); + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + if (matchVersion.onOrAfter(Version.LUCENE_40)) { + KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE); + return new TokenStreamComponents(tokenizer, tokenizer); } else { - streams.source.reset(reader); + KeywordTokenizer tokenizer = new KeywordTokenizer(reader); + return new TokenStreamComponents(tokenizer, new CollationKeyFilter(tokenizer, collator)); } - return streams.result; } } Index: modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (revision 965632) +++ modules/analysis/common/src/java/org/apache/lucene/collation/CollationKeyFilter.java (working copy) @@ -72,6 +72,7 @@ * ICUCollationKeyFilter on the query side, or vice versa. * */ +@Deprecated public final class CollationKeyFilter extends TokenFilter { private final Collator collator; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); Index: modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java (revision 0) +++ modules/analysis/common/src/java/org/apache/lucene/collation/tokenattributes/CollatedTermAttributeImpl.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.collation.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.text.Collator; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; +import org.apache.lucene.util.BytesRef; + +public class CollatedTermAttributeImpl extends CharTermAttributeImpl { + private final Collator collator; + + public CollatedTermAttributeImpl(Collator collator) { + this.collator = collator; + } + + @Override + public int toBytesRef(BytesRef target) { + target.bytes = collator.getCollationKey(toString()).toByteArray(); + target.offset = 0; + target.length = target.bytes.length; + return target.hashCode(); + } + +} Property changes on: modules\analysis\common\src\java\org\apache\lucene\collation\tokenattributes\CollatedTermAttributeImpl.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (revision 965632) +++ lucene/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java (working copy) @@ -21,6 +21,7 @@ import java.nio.CharBuffer; import java.nio.ByteBuffer; +@Deprecated public class TestIndexableBinaryStringTools extends LuceneTestCase { private static final int NUM_RANDOM_TESTS = 2000*_TestUtil.getRandomMultiplier(); private static final int MAX_RANDOM_BINARY_LENGTH = 300*_TestUtil.getRandomMultiplier(); Index: lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (revision 965632) +++ lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (working copy) @@ -77,7 +77,7 @@ } // *** TermToBytesRefAttribute interface *** - public final int toBytesRef(BytesRef target) { + public int toBytesRef(BytesRef target) { return UnicodeUtil.UTF16toUTF8WithHash(termBuffer, 0, termLength, target); } Index: lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java =================================================================== --- lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (revision 965632) +++ lucene/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java (working copy) @@ -53,6 +53,7 @@ * * @lucene.experimental */ +@Deprecated public class IndexableBinaryStringTools { private static final CodingCase[] CODING_CASES = {