Index: src/java/org/apache/lucene/analysis/Analyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/Analyzer.java (revision 826601)
+++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy)
@@ -84,25 +84,18 @@
}
}
- protected boolean overridesTokenStreamMethod;
+ /** @deprecated */
+ protected boolean overridesTokenStreamMethod = false;
/** @deprecated This is only present to preserve
* back-compat of classes that subclass a core analyzer
* and override tokenStream but not reusableTokenStream */
- protected void setOverridesTokenStreamMethod(Class baseClass) {
-
- final Class[] params = new Class[2];
- params[0] = String.class;
- params[1] = Reader.class;
-
+ protected void setOverridesTokenStreamMethod(Class extends Analyzer> baseClass) {
try {
- Method m = this.getClass().getMethod("tokenStream", params);
- if (m != null) {
- overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
- } else {
- overridesTokenStreamMethod = false;
- }
+ Method m = this.getClass().getMethod("tokenStream", String.class, Reader.class);
+ overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
} catch (NoSuchMethodException nsme) {
+ // cannot happen, as baseClass is subclass of Analyzer through generics
overridesTokenStreamMethod = false;
}
}
@@ -121,8 +114,7 @@
* @param fieldName Fieldable name being indexed.
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
- public int getPositionIncrementGap(String fieldName)
- {
+ public int getPositionIncrementGap(String fieldName) {
return 0;
}
Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 826601)
+++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -36,86 +36,25 @@
*
*
As of 2.9, StopFilter preserves position
* increments by default
- *
As of 2.9, Tokens incorrectly identified as acronyms
+ *
As of 2.3, Tokens incorrectly identified as acronyms
* are corrected (see LUCENE-1608
*
*/
public class StandardAnalyzer extends Analyzer {
- private Set stopSet;
+ private Set> stopSet;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
- * This is false by default to support backward compatibility.
- *
- * @deprecated this should be removed in the next release (3.0).
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
+ * See {@linkplain https://issues.apache.org/jira/browse/LUCENE-1068}
*/
- private boolean replaceInvalidAcronym = defaultReplaceInvalidAcronym;
+ private final boolean replaceInvalidAcronym,enableStopPositionIncrements;
- private static boolean defaultReplaceInvalidAcronym;
- private boolean enableStopPositionIncrements;
-
- // @deprecated
- private boolean useDefaultStopPositionIncrements;
-
- // Default to true (fixed the bug), unless the system prop is set
- static {
- final String v = System.getProperty("org.apache.lucene.analysis.standard.StandardAnalyzer.replaceInvalidAcronym");
- if (v == null || v.equals("true"))
- defaultReplaceInvalidAcronym = true;
- else
- defaultReplaceInvalidAcronym = false;
- }
-
- /**
- *
- * @return true if new instances of StandardTokenizer will
- * replace mischaracterized acronyms
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- * @deprecated This will be removed (hardwired to true) in 3.0
- */
- public static boolean getDefaultReplaceInvalidAcronym() {
- return defaultReplaceInvalidAcronym;
- }
-
- /**
- *
- * @param replaceInvalidAcronym Set to true to have new
- * instances of StandardTokenizer replace mischaracterized
- * acronyms by default. Set to false to preserve the
- * previous (before 2.4) buggy behavior. Alternatively,
- * set the system property
- * org.apache.lucene.analysis.standard.StandardAnalyzer.replaceInvalidAcronym
- * to false.
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- * @deprecated This will be removed (hardwired to true) in 3.0
- */
- public static void setDefaultReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
- defaultReplaceInvalidAcronym = replaceInvalidAcronym;
- }
-
-
- /** An array containing some common English words that are usually not
- useful for searching.
- @deprecated Use {@link #STOP_WORDS_SET} instead */
- public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
-
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
- public static final Set/**/ STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ public static final Set> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
- * @deprecated Use {@link #StandardAnalyzer(Version)} instead. */
- public StandardAnalyzer() {
- this(Version.LUCENE_24, STOP_WORDS_SET);
- }
-
- /** Builds an analyzer with the default stop words ({@link
- * #STOP_WORDS}).
* @param matchVersion Lucene version to match See {@link
* above}
*/
@@ -124,141 +63,34 @@
}
/** Builds an analyzer with the given stop words.
- * @deprecated Use {@link #StandardAnalyzer(Version, Set)}
- * instead */
- public StandardAnalyzer(Set stopWords) {
- this(Version.LUCENE_24, stopWords);
- }
-
- /** Builds an analyzer with the given stop words.
* @param matchVersion Lucene version to match See {@link
* above}
* @param stopWords stop words */
- public StandardAnalyzer(Version matchVersion, Set stopWords) {
+ public StandardAnalyzer(Version matchVersion, Set> stopWords) {
stopSet = stopWords;
- init(matchVersion);
+ setOverridesTokenStreamMethod(StandardAnalyzer.class);
+ enableStopPositionIncrements = matchVersion.onOrAfter(Version.LUCENE_29);
+ replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_23);
}
- /** Builds an analyzer with the given stop words.
- * @deprecated Use {@link #StandardAnalyzer(Version, Set)} instead */
- public StandardAnalyzer(String[] stopWords) {
- this(Version.LUCENE_24, StopFilter.makeStopSet(stopWords));
- }
-
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
- * @deprecated Use {@link #StandardAnalyzer(Version, File)}
- * instead
- */
- public StandardAnalyzer(File stopwords) throws IOException {
- this(Version.LUCENE_24, stopwords);
- }
-
- /** Builds an analyzer with the stop words from the given file.
- * @see WordlistLoader#getWordSet(File)
* @param matchVersion Lucene version to match See {@link
* above}
* @param stopwords File to read stop words from */
public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
- stopSet = WordlistLoader.getWordSet(stopwords);
- init(matchVersion);
+ this(matchVersion, WordlistLoader.getWordSet(stopwords));
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
- * @deprecated Use {@link #StandardAnalyzer(Version, Reader)}
- * instead
- */
- public StandardAnalyzer(Reader stopwords) throws IOException {
- this(Version.LUCENE_24, stopwords);
- }
-
- /** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader)
* @param matchVersion Lucene version to match See {@link
* above}
* @param stopwords Reader to read stop words from */
public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- stopSet = WordlistLoader.getWordSet(stopwords);
- init(matchVersion);
+ this(matchVersion, WordlistLoader.getWordSet(stopwords));
}
- /**
- *
- * @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- public StandardAnalyzer(boolean replaceInvalidAcronym) {
- this(Version.LUCENE_24, STOP_WORDS_SET);
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
- }
-
- /**
- * @param stopwords The stopwords to use
- * @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(Version.LUCENE_24, stopwords);
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /**
- * @param stopwords The stopwords to use
- * @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(Version.LUCENE_24, stopwords);
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /**
- *
- * @param stopwords The stopwords to use
- * @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(Version.LUCENE_24, StopFilter.makeStopSet(stopwords));
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /**
- * @param stopwords The stopwords to use
- * @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- *
- * @deprecated Remove in 3.X and make true the only valid value
- */
- public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
- this(Version.LUCENE_24, stopwords);
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- private final void init(Version matchVersion) {
- setOverridesTokenStreamMethod(StandardAnalyzer.class);
- if (matchVersion.onOrAfter(Version.LUCENE_29)) {
- enableStopPositionIncrements = true;
- } else {
- useDefaultStopPositionIncrements = true;
- }
- }
-
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
public TokenStream tokenStream(String fieldName, Reader reader) {
@@ -266,11 +98,7 @@
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
- if (useDefaultStopPositionIncrements) {
- result = new StopFilter(result, stopSet);
- } else {
- result = new StopFilter(enableStopPositionIncrements, result, stopSet);
- }
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
return result;
}
@@ -301,7 +129,6 @@
return maxTokenLength;
}
- /** @deprecated Use {@link #tokenStream} instead */
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
@@ -316,11 +143,7 @@
streams.tokenStream = new StandardTokenizer(reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
- if (useDefaultStopPositionIncrements) {
- streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
- } else {
- streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
- }
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
} else {
streams.tokenStream.reset(reader);
}
@@ -330,26 +153,4 @@
return streams.filteredTokenStream;
}
-
- /**
- *
- * @return true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- * @deprecated This will be removed (hardwired to true) in 3.0
- */
- public boolean isReplaceInvalidAcronym() {
- return replaceInvalidAcronym;
- }
-
- /**
- *
- * @param replaceInvalidAcronym Set to true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer
- *
- * See https://issues.apache.org/jira/browse/LUCENE-1068
- * @deprecated This will be removed (hardwired to true) in 3.0
- */
- public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
}
Index: src/java/org/apache/lucene/analysis/StopAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/StopAnalyzer.java (revision 826601)
+++ src/java/org/apache/lucene/analysis/StopAnalyzer.java (working copy)
@@ -22,147 +22,72 @@
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;
+import java.util.List;
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. */
public final class StopAnalyzer extends Analyzer {
- private final Set/**/ stopWords;
- // @deprecated
- private final boolean useDefaultStopPositionIncrement;
+ private final Set> stopWords;
private final boolean enablePositionIncrements;
-
- /** An array containing some common English words that are not usually useful
- for searching.
- @deprecated Use {@link #ENGLISH_STOP_WORDS_SET} instead */
- public static final String[] ENGLISH_STOP_WORDS = {
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with"
- };
/** An unmodifiable set containing some common English words that are not usually useful
for searching.*/
- public static final Set/**/ ENGLISH_STOP_WORDS_SET;
+ public static final Set> ENGLISH_STOP_WORDS_SET;
static {
- final String[] stopWords = new String[]{
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with"
- };
- final CharArraySet stopSet = new CharArraySet(stopWords.length, false);
- stopSet.addAll(Arrays.asList(stopWords));
- ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
+ final List stopWords = Arrays.asList(
+ "a", "an", "and", "are", "as", "at", "be", "but", "by",
+ "for", "if", "in", "into", "is", "it",
+ "no", "not", "of", "on", "or", "such",
+ "that", "the", "their", "then", "there", "these",
+ "they", "this", "to", "was", "will", "with"
+ );
+ final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
+ stopSet.addAll(stopWords);
+ ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
}
/** Builds an analyzer which removes words in
- * ENGLISH_STOP_WORDS.
- * @deprecated Use {@link #StopAnalyzer(boolean)} instead */
- public StopAnalyzer() {
- stopWords = ENGLISH_STOP_WORDS_SET;
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /** Builds an analyzer which removes words in
- * ENGLISH_STOP_WORDS.
+ * {@link #ENGLISH_STOP_WORDS}.
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(boolean enablePositionIncrements) {
stopWords = ENGLISH_STOP_WORDS_SET;
this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
}
/** Builds an analyzer with the stop words from the given set.
- * @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */
- public StopAnalyzer(Set stopWords) {
- this.stopWords = stopWords;
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /** Builds an analyzer with the stop words from the given set.
* @param stopWords Set of stop words
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Set stopWords, boolean enablePositionIncrements) {
this.stopWords = stopWords;
this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
}
- /** Builds an analyzer which removes words in the provided array.
- * @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */
- public StopAnalyzer(String[] stopWords) {
- this.stopWords = StopFilter.makeStopSet(stopWords);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /** Builds an analyzer which removes words in the provided array.
- * @param stopWords Array of stop words
- * @param enablePositionIncrements See {@link
- * StopFilter#setEnablePositionIncrements}
- * @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead*/
- public StopAnalyzer(String[] stopWords, boolean enablePositionIncrements) {
- this.stopWords = StopFilter.makeStopSet(stopWords);
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
- }
-
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
- * @deprecated Use {@link #StopAnalyzer(File, boolean)} instead */
- public StopAnalyzer(File stopwordsFile) throws IOException {
- stopWords = WordlistLoader.getWordSet(stopwordsFile);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /** Builds an analyzer with the stop words from the given file.
- * @see WordlistLoader#getWordSet(File)
* @param stopwordsFile File to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwordsFile);
this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
- * @deprecated Use {@link #StopAnalyzer(Reader, boolean)} instead
- */
- public StopAnalyzer(Reader stopwords) throws IOException {
- stopWords = WordlistLoader.getWordSet(stopwords);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwords);
this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
}
/** Filters LowerCaseTokenizer with StopFilter. */
public TokenStream tokenStream(String fieldName, Reader reader) {
- if (useDefaultStopPositionIncrement) {
- return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
- } else {
- return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
- }
+ return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
}
/** Filters LowerCaseTokenizer with StopFilter. */
@@ -175,11 +100,7 @@
if (streams == null) {
streams = new SavedStreams();
streams.source = new LowerCaseTokenizer(reader);
- if (useDefaultStopPositionIncrement) {
- streams.result = new StopFilter(streams.source, stopWords);
- } else {
- streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
- }
+ streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
setPreviousTokenStream(streams);
} else
streams.source.reset(reader);
Index: src/java/org/apache/lucene/analysis/StopFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/StopFilter.java (revision 826601)
+++ src/java/org/apache/lucene/analysis/StopFilter.java (working copy)
@@ -32,64 +32,14 @@
public final class StopFilter extends TokenFilter {
- // deprecated
- private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
-
private final CharArraySet stopWords;
- private boolean enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
+ private boolean enablePositionIncrements = false;
private TermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
-
- /**
- * Construct a token stream filtering the given input.
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, String[])} instead
- */
- public StopFilter(TokenStream input, String [] stopWords)
- {
- this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false);
- }
/**
* Construct a token stream filtering the given input.
- * @param enablePositionIncrements true if token positions should record the removed stop words
- * @param input input TokenStream
- * @param stopWords array of stop words
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead.
- */
- public StopFilter(boolean enablePositionIncrements, TokenStream input, String [] stopWords)
- {
- this(enablePositionIncrements, input, stopWords, false);
- }
-
- /**
- * Constructs a filter which removes words from the input
- * TokenStream that are named in the array of words.
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead
- */
- public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) {
- this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, ignoreCase);
- }
-
- /**
- * Constructs a filter which removes words from the input
- * TokenStream that are named in the array of words.
- * @param enablePositionIncrements true if token positions should record the removed stop words
- * @param in input TokenStream
- * @param stopWords array of stop words
- * @param ignoreCase true if case is ignored
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead.
- */
- public StopFilter(boolean enablePositionIncrements, TokenStream in, String[] stopWords, boolean ignoreCase) {
- super(in);
- this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase);
- this.enablePositionIncrements = enablePositionIncrements;
- init();
- }
-
-
- /**
- * Construct a token stream filtering the given input.
* If stopWords is an instance of {@link CharArraySet} (true if
* makeStopSet() was used to construct the set) it will be directly used
* and ignoreCase will be ignored since CharArraySet
@@ -99,33 +49,13 @@
* a new CharArraySet will be constructed and ignoreCase will be
* used to specify the case sensitivity of that set.
*
- * @param input
- * @param stopWords The set of Stop Words.
- * @param ignoreCase -Ignore case when stopping.
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead
- */
- public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase)
- {
- this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase);
- }
-
- /**
- * Construct a token stream filtering the given input.
- * If stopWords is an instance of {@link CharArraySet} (true if
- * makeStopSet() was used to construct the set) it will be directly used
- * and ignoreCase will be ignored since CharArraySet
- * directly controls case sensitivity.
- *
- * If stopWords is not an instance of {@link CharArraySet},
- * a new CharArraySet will be constructed and ignoreCase will be
- * used to specify the case sensitivity of that set.
- *
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param input Input TokenStream
- * @param stopWords The set of Stop Words.
+ * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
+ * @param ignoreCase if true, all words are lower cased first
* @param ignoreCase -Ignore case when stopping.
*/
- public StopFilter(boolean enablePositionIncrements, TokenStream input, Set stopWords, boolean ignoreCase)
+ public StopFilter(boolean enablePositionIncrements, TokenStream input, Set> stopWords, boolean ignoreCase)
{
super(input);
if (stopWords instanceof CharArraySet) {
@@ -135,37 +65,22 @@
this.stopWords.addAll(stopWords);
}
this.enablePositionIncrements = enablePositionIncrements;
- init();
+ termAtt = addAttribute(TermAttribute.class);
+ posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
/**
* Constructs a filter which removes words from the input
* TokenStream that are named in the Set.
*
- * @see #makeStopSet(java.lang.String[])
- * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead
- */
- public StopFilter(TokenStream in, Set stopWords) {
- this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, false);
- }
-
- /**
- * Constructs a filter which removes words from the input
- * TokenStream that are named in the Set.
- *
* @param enablePositionIncrements true if token positions should record the removed stop words
* @param in Input stream
- * @param stopWords The set of Stop Words.
+ * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
* @see #makeStopSet(java.lang.String[])
*/
- public StopFilter(boolean enablePositionIncrements, TokenStream in, Set stopWords) {
+ public StopFilter(boolean enablePositionIncrements, TokenStream in, Set> stopWords) {
this(enablePositionIncrements, in, stopWords, false);
}
-
- public void init() {
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- }
/**
* Builds a Set from an array of stop words,
@@ -175,7 +90,7 @@
*
* @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
*/
- public static final Set makeStopSet(String[] stopWords) {
+ public static final Set