Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (working copy) @@ -22,11 +22,17 @@ /** * Factory for {@link KeywordRepeatFilter}. + * + * Since {@link KeywordRepeatFilter} emits two tokens for every input token, and any tokens that aren't transformed + * later in the analysis chain will be in the document twice. Therefore, consider adding + * {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain, e.g. *
  * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
  *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  *     <filter class="solr.KeywordRepeatFilter"/>
+ *     <filter class="solr.PorterStemFilterFactory"/>
+ *     <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
  *   </analyzer>
  * </fieldType>
*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (working copy) @@ -37,6 +37,18 @@ *
  • For other languages, see {@link LowerCaseFilter}. * *

    + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class SnowballFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (working copy) @@ -30,6 +30,18 @@ /** * TokenFilter that uses hunspell affix rules and words to stem tokens. Since hunspell supports a word having multiple * stems, this filter can emit multiple tokens for each consumed token + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class HunspellStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java (working copy) @@ -48,6 +48,9 @@ certain terms from being passed to the stemmer {@link KeywordAttribute#isKeyword()} should be set to true in a previous {@link TokenStream}. + + Note: For including the original term as well as the stemmed version, see + {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}

    */ public final class PorterStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (working copy) @@ -32,6 +32,18 @@ * Conference on Research and Development in Information Retrieval, 191-203, 1993). *

    * All terms must already be lowercased for this filter to work correctly. + * + *

    + * Note: This filter is aware of the {@link KeywordAttribute}. To prevent + * certain terms from being passed to the stemmer + * {@link KeywordAttribute#isKeyword()} should be set to true + * in a previous {@link TokenStream}. + * + * Note: For including the original term as well as the stemmed version, see + * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory} + *

    + * + * */ public final class KStemFilter extends TokenFilter {