Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 778214)
+++ CHANGES.txt (working copy)
@@ -138,6 +138,10 @@
14. LUCENE-1636: Make TokenFilter.input final so it's set only
once. (Wouter Heijke, Uwe Schindler via Mike McCandless).
+15. LUCENE-1660: StopFilter, StandardAnalyzer, StopAnalyzer now
+ require up front specification of enablePositionIncrement (Mike
+ McCandless)
+
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
Index: src/test/org/apache/lucene/analysis/TestStopFilter.java
===================================================================
--- src/test/org/apache/lucene/analysis/TestStopFilter.java (revision 778214)
+++ src/test/org/apache/lucene/analysis/TestStopFilter.java (working copy)
@@ -36,7 +36,7 @@
public void testExactCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
- TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords);
+ TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@@ -48,7 +48,7 @@
public void testIgnoreCase() throws IOException {
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
- TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopWords, true);
+ TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopWords, true);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@@ -59,7 +59,7 @@
StringReader reader = new StringReader("Now is The Time");
String[] stopWords = new String[] { "is", "the", "Time" };
Set stopSet = StopFilter.makeStopSet(stopWords);
- TokenStream stream = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+ TokenStream stream = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
final TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
@@ -85,11 +85,11 @@
Set stopSet = StopFilter.makeStopSet(stopWords);
// with increments
StringReader reader = new StringReader(sb.toString());
- StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+ StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
doTestStopPositons(stpf,true);
// without increments
reader = new StringReader(sb.toString());
- stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+ stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet);
doTestStopPositons(stpf,false);
// with increments, concatenating two stop filters
ArrayList a0 = new ArrayList();
@@ -108,9 +108,9 @@
Set stopSet0 = StopFilter.makeStopSet(stopWords0);
Set stopSet1 = StopFilter.makeStopSet(stopWords1);
reader = new StringReader(sb.toString());
- StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first part of the set
+ StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0); // first part of the set
stpf0.setEnablePositionIncrements(true);
- StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
+ StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated!
doTestStopPositons(stpf01,true);
}
Index: src/test/org/apache/lucene/search/TestPositionIncrement.java
===================================================================
--- src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 778214)
+++ src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy)
@@ -17,7 +17,6 @@
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
@@ -158,17 +157,9 @@
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
- // analyzer to introduce stopwords and increment gaps
- Analyzer stpa = new Analyzer() {
- final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream ts = a.tokenStream(fieldName,reader);
- return new StopFilter(ts,new String[]{"stop"});
- }
- };
-
// should not find "1 2" because there is a gap of 1 in the index
- QueryParser qp = new QueryParser("field",stpa);
+ QueryParser qp = new QueryParser("field",
+ new StopWhitespaceAnalyzer(false));
q = (PhraseQuery) qp.parse("\"1 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
@@ -184,22 +175,30 @@
hits = searcher.search(q, null, 1000).scoreDocs;
assertEquals(0, hits.length);
- boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
- try {
- // stop filter alone won't help, because query parser swallows the increments.
- qp.setEnablePositionIncrements(false);
- StopFilter.setEnablePositionIncrementsDefault(true);
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
+ // stop filter alone won't help, because query parser swallows the increments.
+ qp.setEnablePositionIncrements(false);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
- // when both qp qnd stopFilter propagate increments, we should find the doc.
- qp.setEnablePositionIncrements(true);
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
- } finally {
- StopFilter.setEnablePositionIncrementsDefault(dflt);
+ // when both qp qnd stopFilter propagate increments, we should find the doc.
+ qp = new QueryParser("field",
+ new StopWhitespaceAnalyzer(true));
+ qp.setEnablePositionIncrements(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+ }
+
+ private static class StopWhitespaceAnalyzer extends Analyzer {
+ boolean enablePositionIncrements;
+ final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
+ public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
+ this.enablePositionIncrements = enablePositionIncrements;
}
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream ts = a.tokenStream(fieldName,reader);
+ return new StopFilter(enablePositionIncrements, ts, new String[]{"stop"});
+ }
}
}
Index: src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 778214)
+++ src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -44,6 +44,8 @@
private boolean replaceInvalidAcronym = defaultReplaceInvalidAcronym;
private static boolean defaultReplaceInvalidAcronym;
+ private boolean enableStopPositionIncrements;
+ private boolean useDefaultStopPositionIncrements;
// Default to true (fixed the bug), unless the system prop is set
static {
@@ -88,35 +90,91 @@
useful for searching. */
public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
- /** Builds an analyzer with the default stop words ({@link #STOP_WORDS}). */
+ /** Builds an analyzer with the default stop words ({@link
+ * #STOP_WORDS}).
+ * @deprecated Use {@link #StandardAnalyzer(boolean, String[])},
+ * passing in null for the stop words, instead */
public StandardAnalyzer() {
this(STOP_WORDS);
}
- /** Builds an analyzer with the given stop words. */
+ /** Builds an analyzer with the given stop words.
+ * @deprecated Use {@link #StandardAnalyzer(boolean, Set)}
+ * instead */
public StandardAnalyzer(Set stopWords) {
stopSet = stopWords;
+ useDefaultStopPositionIncrements = true;
}
- /** Builds an analyzer with the given stop words. */
+ /** Builds an analyzer with the given stop words.
+ * @param enableStopPositionIncrements See {@link
+ * StopFilter#setEnablePositionIncrements}
+ * @param stopWords stop words */
+ public StandardAnalyzer(boolean enableStopPositionIncrements, Set stopWords) {
+ stopSet = stopWords;
+ this.enableStopPositionIncrements = enableStopPositionIncrements;
+ }
+
+ /** Builds an analyzer with the given stop words.
+ * @deprecated Use {@link #StandardAnalyzer(boolean,
+ * String[])} instead */
public StandardAnalyzer(String[] stopWords) {
+ if (stopWords == null) {
+ stopWords = STOP_WORDS;
+ }
stopSet = StopFilter.makeStopSet(stopWords);
+ useDefaultStopPositionIncrements = true;
}
+ /** Builds an analyzer with the given stop words.
+ * @param enableStopPositionIncrements See {@link
+ * StopFilter#setEnablePositionIncrements}
+ * @param stopWords Array of stop words */
+ public StandardAnalyzer(boolean enableStopPositionIncrements, String[] stopWords) {
+ stopSet = StopFilter.makeStopSet(stopWords);
+ this.enableStopPositionIncrements = enableStopPositionIncrements;
+ }
+
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
+ * @deprecated Use {@link #StandardAnalyzer(boolean, File)}
+ * instead
*/
public StandardAnalyzer(File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
+ useDefaultStopPositionIncrements = true;
}
+ /** Builds an analyzer with the stop words from the given file.
+ * @see WordlistLoader#getWordSet(File)
+ * @param enableStopPositionIncrements See {@link
+ * StopFilter#setEnablePositionIncrements}
+ * @param stopwords File to read stop words from */
+ public StandardAnalyzer(boolean enableStopPositionIncrements, File stopwords) throws IOException {
+ stopSet = WordlistLoader.getWordSet(stopwords);
+ this.enableStopPositionIncrements = enableStopPositionIncrements;
+ }
+
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
+ * @deprecated Use {@link #StandardAnalyzer(boolean, Reader)}
+ * instead
*/
public StandardAnalyzer(Reader stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
+ useDefaultStopPositionIncrements = true;
}
+ /** Builds an analyzer with the stop words from the given reader.
+ * @see WordlistLoader#getWordSet(Reader)
+ * @param enableStopPositionIncrements See {@link
+ * StopFilter#setEnablePositionIncrements}
+ * @param stopwords Reader to read stop words from */
+ public StandardAnalyzer(boolean enableStopPositionIncrements, Reader stopwords) throws IOException {
+ stopSet = WordlistLoader.getWordSet(stopwords);
+ this.enableStopPositionIncrements = enableStopPositionIncrements;
+ }
+
/**
*
* @param replaceInvalidAcronym Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
@@ -128,6 +186,7 @@
public StandardAnalyzer(boolean replaceInvalidAcronym) {
this(STOP_WORDS);
this.replaceInvalidAcronym = replaceInvalidAcronym;
+ useDefaultStopPositionIncrements = true;
}
/**
@@ -141,6 +200,7 @@
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
+ useDefaultStopPositionIncrements = true;
}
/**
@@ -154,6 +214,7 @@
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
+ useDefaultStopPositionIncrements = true;
}
/**
@@ -168,6 +229,7 @@
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
+ useDefaultStopPositionIncrements = true;
}
/**
@@ -181,6 +243,7 @@
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
this(stopwords);
this.replaceInvalidAcronym = replaceInvalidAcronym;
+ useDefaultStopPositionIncrements = true;
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link
@@ -190,7 +253,11 @@
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
- result = new StopFilter(result, stopSet);
+ if (useDefaultStopPositionIncrements) {
+ result = new StopFilter(result, stopSet);
+ } else {
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+ }
return result;
}
@@ -229,7 +296,11 @@
streams.tokenStream = new StandardTokenizer(reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
- streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
+ if (useDefaultStopPositionIncrements) {
+ streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
+ } else {
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
+ }
} else {
streams.tokenStream.reset(reader);
}
Index: src/java/org/apache/lucene/analysis/StopFilter.java
===================================================================
--- src/java/org/apache/lucene/analysis/StopFilter.java (revision 778214)
+++ src/java/org/apache/lucene/analysis/StopFilter.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.queryParser.QueryParser; // for javadoc
/**
* Removes stop words from a token stream.
@@ -31,6 +32,7 @@
public final class StopFilter extends TokenFilter {
+ // deprecated
private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
private final CharArraySet stopWords;
@@ -41,19 +43,45 @@
/**
* Construct a token stream filtering the given input.
+ * @deprecated Use {@link #StopFilter(boolean, TokenStream, String[])} instead
*/
public StopFilter(TokenStream input, String [] stopWords)
{
- this(input, stopWords, false);
+ this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false);
}
/**
+ * Construct a token stream filtering the given input.
+ * @param enablePositionIncrements true if token positions should record the removed stop words
+ * @param input input TokenStream
+ * @param stopWords array of stop words
+ */
+ public StopFilter(boolean enablePositionIncrements, TokenStream input, String [] stopWords)
+ {
+ this(enablePositionIncrements, input, stopWords, false);
+ }
+
+ /**
* Constructs a filter which removes words from the input
* TokenStream that are named in the array of words.
+ * @deprecated Use {@link #StopFilter(boolean, TokenStream, String[], boolean)} instead
*/
public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) {
+ this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, ignoreCase);
+ }
+
+ /**
+ * Constructs a filter which removes words from the input
+ * TokenStream that are named in the array of words.
+ * @param enablePositionIncrements true if token positions should record the removed stop words
+ * @param in input TokenStream
+ * @param stopWords array of stop words
+ * @param ignoreCase true if case is ignored
+ */
+ public StopFilter(boolean enablePositionIncrements, TokenStream in, String[] stopWords, boolean ignoreCase) {
super(in);
this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase);
+ this.enablePositionIncrements = enablePositionIncrements;
init();
}
@@ -72,9 +100,31 @@
* @param input
* @param stopWords The set of Stop Words.
* @param ignoreCase -Ignore case when stopping.
+ * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set, boolean)} instead
*/
public StopFilter(TokenStream input, Set stopWords, boolean ignoreCase)
{
+ this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase);
+ }
+
+ /**
+ * Construct a token stream filtering the given input.
+ * If stopWords is an instance of {@link CharArraySet} (true if
+ * makeStopSet() was used to construct the set) it will be directly used
+ * and ignoreCase will be ignored since CharArraySet
+ * directly controls case sensitivity.
+ *
stopWords is not an instance of {@link CharArraySet},
+ * a new CharArraySet will be constructed and ignoreCase will be
+ * used to specify the case sensitivity of that set.
+ *
+ * @param enablePositionIncrements true if token positions should record the removed stop words
+ * @param input Input TokenStream
+ * @param stopWords The set of Stop Words.
+ * @param ignoreCase -Ignore case when stopping.
+ */
+ public StopFilter(boolean enablePositionIncrements, TokenStream input, Set stopWords, boolean ignoreCase)
+ {
super(input);
if (stopWords instanceof CharArraySet) {
this.stopWords = (CharArraySet)stopWords;
@@ -82,6 +132,7 @@
this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
this.stopWords.addAll(stopWords);
}
+ this.enablePositionIncrements = enablePositionIncrements;
init();
}
@@ -90,10 +141,24 @@
* TokenStream that are named in the Set.
*
* @see #makeStopSet(java.lang.String[])
+ * @deprecated Use {@link #StopFilter(boolean, TokenStream, Set)} instead
*/
public StopFilter(TokenStream in, Set stopWords) {
- this(in, stopWords, false);
+ this(ENABLE_POSITION_INCREMENTS_DEFAULT, in, stopWords, false);
}
+
+ /**
+ * Constructs a filter which removes words from the input
+ * TokenStream that are named in the Set.
+ *
+ * @param enablePositionIncrements true if token positions should record the removed stop words
+ * @param in Input stream
+ * @param stopWords The set of Stop Words.
+ * @see #makeStopSet(java.lang.String[])
+ */
+ public StopFilter(boolean enablePositionIncrements, TokenStream in, Set stopWords) {
+ this(enablePositionIncrements, in, stopWords, false);
+ }
public void init() {
termAtt = (TermAttribute) addAttribute(TermAttribute.class);
@@ -190,6 +255,7 @@
/**
* @see #setEnablePositionIncrementsDefault(boolean).
+ * @deprecated Please specify this when you create the StopFilter
*/
public static boolean getEnablePositionIncrementsDefault() {
return ENABLE_POSITION_INCREMENTS_DEFAULT;
@@ -205,6 +271,7 @@
*
* Default : false.
* @see #setEnablePositionIncrements(boolean).
+ * @deprecated Please specify this when you create the StopFilter
*/
public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
@@ -218,12 +285,17 @@
}
/**
- * Set to true to make this StopFilter enable position increments to result tokens.
+ * Set to true to make this StopFilter
+ * enable position increments to result tokens.
+ * Generally true is best as it does not lose information
+ * (positions of the original tokens) during indexing.
*
* When set, when a token is stopped (omitted), the position increment of * the following token is incremented. *
- * Default: see {@link #setEnablePositionIncrementsDefault(boolean)}. + * NOTE: be sure to also set {@link + * QueryParser#setEnablePositionIncrements} if you use + * QueryParser to create queries. */ public void setEnablePositionIncrements(boolean enable) { this.enablePositionIncrements = enable; Index: src/java/org/apache/lucene/analysis/StopAnalyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/StopAnalyzer.java (revision 778214) +++ src/java/org/apache/lucene/analysis/StopAnalyzer.java (working copy) @@ -26,6 +26,8 @@ public final class StopAnalyzer extends Analyzer { private Set stopWords; + private boolean useDefaultStopPositionIncrement; + private boolean enablePositionIncrements; /** An array containing some common English words that are not usually useful for searching. */ @@ -37,39 +39,99 @@ "they", "this", "to", "was", "will", "with" }; - /** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */ + /** Builds an analyzer which removes words in + * ENGLISH_STOP_WORDS. + * @deprecated Use {@link #StopAnalyzer(boolean)} instead */ public StopAnalyzer() { stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS); + useDefaultStopPositionIncrement = true; } + /** Builds an analyzer which removes words in + * ENGLISH_STOP_WORDS. + * @param enablePositionIncrements See {@link + * StopFilter#setEnablePositionIncrements} */ + public StopAnalyzer(boolean enablePositionIncrements) { + stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS); + this.enablePositionIncrements = enablePositionIncrements; + } + /** Builds an analyzer with the stop words from the given set. - */ + * @deprecated Use {@link #StopAnalyzer(Set, boolean)} instead */ public StopAnalyzer(Set stopWords) { this.stopWords = stopWords; + useDefaultStopPositionIncrement = true; } - /** Builds an analyzer which removes words in the provided array. */ + /** Builds an analyzer with the stop words from the given set. + * @param stopWords Set of stop words + * @param enablePositionIncrements See {@link + * StopFilter#setEnablePositionIncrements} */ + public StopAnalyzer(Set stopWords, boolean enablePositionIncrements) { + this.stopWords = stopWords; + this.enablePositionIncrements = enablePositionIncrements; + } + + /** Builds an analyzer which removes words in the provided array. + * @deprecated Use {@link #StopAnalyzer(String[], boolean)} instead */ public StopAnalyzer(String[] stopWords) { this.stopWords = StopFilter.makeStopSet(stopWords); + useDefaultStopPositionIncrement = true; } + /** Builds an analyzer which removes words in the provided array. + * @param stopWords Array of stop words + * @param enablePositionIncrements See {@link + * StopFilter#setEnablePositionIncrements} */ + public StopAnalyzer(String[] stopWords, boolean enablePositionIncrements) { + this.stopWords = StopFilter.makeStopSet(stopWords); + this.enablePositionIncrements = enablePositionIncrements; + } + /** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(File) - */ + * @deprecated Use {@link #StopAnalyzer(File, boolean)} instead */ public StopAnalyzer(File stopwordsFile) throws IOException { stopWords = WordlistLoader.getWordSet(stopwordsFile); + useDefaultStopPositionIncrement = true; } + /** Builds an analyzer with the stop words from the given file. + * @see WordlistLoader#getWordSet(File) + * @param stopwordsFile File to load stop words from + * @param enablePositionIncrements See {@link + * StopFilter#setEnablePositionIncrements} */ + public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException { + stopWords = WordlistLoader.getWordSet(stopwordsFile); + this.enablePositionIncrements = enablePositionIncrements; + } + /** Builds an analyzer with the stop words from the given reader. * @see WordlistLoader#getWordSet(Reader) + * @deprecated Use {@link #StopAnalyzer(Reader, boolean)} instead */ public StopAnalyzer(Reader stopwords) throws IOException { stopWords = WordlistLoader.getWordSet(stopwords); + useDefaultStopPositionIncrement = true; } + /** Builds an analyzer with the stop words from the given reader. + * @see WordlistLoader#getWordSet(Reader) + * @param stopwords Reader to load stop words from + * @param enablePositionIncrements See {@link + * StopFilter#setEnablePositionIncrements} */ + public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException { + stopWords = WordlistLoader.getWordSet(stopwords); + this.enablePositionIncrements = enablePositionIncrements; + } + /** Filters LowerCaseTokenizer with StopFilter. */ public TokenStream tokenStream(String fieldName, Reader reader) { - return new StopFilter(new LowerCaseTokenizer(reader), stopWords); + if (useDefaultStopPositionIncrement) { + return new StopFilter(new LowerCaseTokenizer(reader), stopWords); + } else { + return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords); + } } /** Filters LowerCaseTokenizer with StopFilter. */ @@ -82,7 +144,11 @@ if (streams == null) { streams = new SavedStreams(); streams.source = new LowerCaseTokenizer(reader); - streams.result = new StopFilter(streams.source, stopWords); + if (useDefaultStopPositionIncrement) { + streams.result = new StopFilter(streams.source, stopWords); + } else { + streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords); + } setPreviousTokenStream(streams); } else streams.source.reset(reader);