Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (working copy) @@ -22,13 +22,10 @@ /** * Factory for {@link KeywordRepeatFilter}. - *
- * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
- *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- *     <filter class="solr.KeywordRepeatFilter"/>
- *   </analyzer>
- * </fieldType>
+ * + * Since {@link KeywordRepeatFilter} emits two tokens for every input token, and any tokens that aren't transformed + * later in the analysis chain will be in the document twice. Therefore, consider adding + * {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain. */ public final class KeywordRepeatFilterFactory extends TokenFilterFactory { @Override Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (working copy) @@ -37,6 +37,18 @@ *
  • For other languages, see {@link LowerCaseFilter}. * *

    + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class SnowballFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (working copy) @@ -30,6 +30,18 @@ /** * TokenFilter that uses hunspell affix rules and words to stem tokens. Since hunspell supports a word having multiple * stems, this filter can emit multiple tokens for each consumed token + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class HunspellStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java (working copy) @@ -48,6 +48,9 @@ certain terms from being passed to the stemmer {@link KeywordAttribute#isKeyword()} should be set to true in a previous {@link TokenStream}. + + Note: For including the original term as well as the stemmed version, see + {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}

    */ public final class PorterStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (working copy) @@ -32,6 +32,18 @@ * Conference on Research and Development in Information Retrieval, 191-203, 1993). *

    * All terms must already be lowercased for this filter to work correctly. + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class KStemFilter extends TokenFilter {