Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java (working copy) @@ -22,13 +22,10 @@ /** * Factory for {@link KeywordRepeatFilter}. - *
- * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"> - * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> - * <filter class="solr.KeywordRepeatFilter"/> - * </analyzer> - * </fieldType>+ * + * Since {@link KeywordRepeatFilter} emits two tokens for every input token, and any tokens that aren't transformed + * later in the analysis chain will be in the document twice. Therefore, consider adding + * {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain. */ public final class KeywordRepeatFilterFactory extends TokenFilterFactory { @Override Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (revision 1454344) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (working copy) @@ -37,6 +37,18 @@ *
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to true
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ *
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to true
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ *
true
in a previous {@link TokenStream}.
+
+ Note: For including the original term as well as the stemmed version, see
+ {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
*/
public final class PorterStemFilter extends TokenFilter {
Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
===================================================================
--- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (revision 1454344)
+++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java (working copy)
@@ -32,6 +32,18 @@
* Conference on Research and Development in Information Retrieval, 191-203, 1993).
*
* All terms must already be lowercased for this filter to work correctly.
+ *
+ *
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to true
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ *