Index: modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (revision 1188604)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (working copy)
@@ -48,7 +48,7 @@
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
- * {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
+ * {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public abstract class CompoundWordTokenFilterBase extends TokenFilter {
@@ -103,34 +103,7 @@
this.dictionary = new CharArraySet(matchVersion, dictionary, true);
}
}
-
- /** @deprecated Use the constructors taking {@link Set} */
- @Deprecated
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary) {
- this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, false);
- }
-
- /** @deprecated Use the constructors taking {@link Set} */
- @Deprecated
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- this(matchVersion, input,makeDictionary(matchVersion,dictionary),minWordSize,minSubwordSize,maxSubwordSize, onlyLongestMatch);
- }
- /** @deprecated Use the constructors taking {@link Set} */
- @Deprecated
- protected CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, String[] dictionary, boolean onlyLongestMatch) {
- this(matchVersion, input,makeDictionary(matchVersion,dictionary),DEFAULT_MIN_WORD_SIZE,DEFAULT_MIN_SUBWORD_SIZE,DEFAULT_MAX_SUBWORD_SIZE, onlyLongestMatch);
- }
-
- /** @deprecated Only available for backwards compatibility. */
- @Deprecated
- public static CharArraySet makeDictionary(final Version matchVersion, final String[] dictionary) {
- if (dictionary == null) {
- return null;
- }
- return new CharArraySet(matchVersion, Arrays.asList(dictionary), true);
- }
-
@Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {
@@ -190,5 +163,5 @@
this.endOffset = newStart + length;
}
- }
+ }
}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java (revision 1188604)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java (working copy)
@@ -43,56 +43,10 @@
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
- * {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
+ * {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
- /**
- * Creates a new {@link DictionaryCompoundWordTokenFilter}.
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
- * @param input
- * the {@link TokenStream} to process
- * @param dictionary
- * the word dictionary to match against
- * @param minWordSize
- * only words longer than this get processed
- * @param minSubwordSize
- * only subwords longer than this get to the output stream
- * @param maxSubwordSize
- * only subwords shorter than this get to the output stream
- * @param onlyLongestMatch
- * Add only the longest matching subword to the stream
- * @deprecated Use the constructors taking {@link Set}
- */
- @Deprecated
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary,
- int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
- }
-
- /**
- * Creates a new {@link DictionaryCompoundWordTokenFilter}
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
- *
- * @param input
- * the {@link TokenStream} to process
- * @param dictionary
- * the word dictionary to match against
- * @deprecated Use the constructors taking {@link Set}
- */
- @Deprecated
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, String[] dictionary) {
- super(matchVersion, input, dictionary);
- }
/**
* Creates a new {@link DictionaryCompoundWordTokenFilter}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (revision 1188604)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (working copy)
@@ -46,71 +46,14 @@
* have {@link org.apache.lucene.analysis.core.LowerCaseFilter} before this filter in your analysis chain.
* For optional performance (as this filter does lots of lookups to the dictionary,
* you should use the latter analysis chain/CharArraySet). Be aware: If you supply arbitrary
- * {@link Set Sets} to the ctors or {@code String[]} dictionaries, they will be automatically
+ * {@link Set Sets} to the ctors, they will be automatically
* transformed to case-insensitive!
*/
public class HyphenationCompoundWordTokenFilter extends
CompoundWordTokenFilterBase {
private HyphenationTree hyphenator;
-
- /**
- * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
- * @param input
- * the {@link TokenStream} to process
- * @param hyphenator
- * the hyphenation pattern tree to use for hyphenation
- * @param dictionary
- * the word dictionary to match against
- * @param minWordSize
- * only words longer than this get processed
- * @param minSubwordSize
- * only subwords longer than this get to the output stream
- * @param maxSubwordSize
- * only subwords shorter than this get to the output stream
- * @param onlyLongestMatch
- * Add only the longest matching subword to the stream
- * @deprecated Use the constructors taking {@link Set}
- */
- @Deprecated
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, String[] dictionary, int minWordSize,
- int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
- super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
- onlyLongestMatch);
- this.hyphenator = hyphenator;
- }
-
/**
- * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
- *
- * @param matchVersion
- * Lucene version to enable correct Unicode 4.0 behavior in the
- * dictionaries if Version > 3.0. See CompoundWordTokenFilterBase for details.
- * @param input
- * the {@link TokenStream} to process
- * @param hyphenator
- * the hyphenation pattern tree to use for hyphenation
- * @param dictionary
- * the word dictionary to match against
- * @deprecated Use the constructors taking {@link Set}
- */
- @Deprecated
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
- HyphenationTree hyphenator, String[] dictionary) {
- this(matchVersion, input, hyphenator, makeDictionary(matchVersion,dictionary), DEFAULT_MIN_WORD_SIZE,
- DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
- }
-
- /**
* Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
*
* @param matchVersion
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (revision 1188604)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (working copy)
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.StringReader;
+import java.util.Arrays;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -27,14 +28,20 @@
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.xml.sax.InputSource;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
+
+ private static CharArraySet makeDictionary(String... dictionary) {
+ return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
+ }
+
public void testHyphenationCompoundWordsDA() throws Exception {
- String[] dict = { "læse", "hest" };
+ CharArraySet dict = makeDictionary("læse", "hest");
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
@@ -53,7 +60,7 @@
}
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
- String[] dict = { "basketball", "basket", "ball", "kurv" };
+ CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
@@ -121,9 +128,9 @@
}
public void testDumbCompoundWordsSE() throws Exception {
- String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
+ CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiol", "Makare", "Gesäll",
- "Sko", "Vind", "Rute", "Torkare", "Blad" };
+ "Sko", "Vind", "Rute", "Torkare", "Blad");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new MockTokenizer(
@@ -151,9 +158,9 @@
}
public void testDumbCompoundWordsSELongestMatch() throws Exception {
- String[] dict = { "Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
+ CharArraySet dict = makeDictionary("Bil", "Dörr", "Motor", "Tak", "Borr", "Slag", "Hammar",
"Pelar", "Glas", "Ögon", "Fodral", "Bas", "Fiols", "Makare", "Gesäll",
- "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral" };
+ "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new MockTokenizer(new StringReader("Basfiolsfodralmakaregesäll"), MockTokenizer.WHITESPACE, false),
@@ -168,7 +175,7 @@
}
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
- String[] dict = {"ab", "cd", "ef"};
+ CharArraySet dict = makeDictionary("ab", "cd", "ef");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@@ -189,7 +196,7 @@
}
public void testWordComponentWithLessThanMinimumLength() throws Exception {
- String[] dict = {"abc", "d", "efg"};
+ CharArraySet dict = makeDictionary("abc", "d", "efg");
DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
@@ -211,8 +218,8 @@
}
public void testReset() throws Exception {
- String[] dict = { "Rind", "Fleisch", "Draht", "Schere", "Gesetz",
- "Aufgabe", "Überwachung" };
+ CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
+ "Aufgabe", "Überwachung");
Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Rindfleischüberwachungsgesetz"));
@@ -234,7 +241,7 @@
}
public void testRetainMockAttribute() throws Exception {
- String[] dict = { "abc", "d", "efg" };
+ CharArraySet dict = makeDictionary("abc", "d", "efg");
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("abcdefg"));
TokenStream stream = new MockRetainAttributeFilter(tokenizer);