Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; @@ -107,7 +107,7 @@ /** * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * {@link ArabicStemFilter}. * * @param matchVersion @@ -131,7 +131,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link StopFilter}, - * {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter} + * {@link ArabicNormalizationFilter}, {@link SetKeywordMarkerFilter} * if a stem exclusion set is provided and {@link ArabicStemFilter}. */ @Override @@ -144,7 +144,7 @@ // TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?! result = new ArabicNormalizationFilter(result); if(!stemExclusionSet.isEmpty()) { - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); } return new TokenStreamComponents(source, new ArabicStemFilter(result)); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc @link +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -29,10 +29,10 @@ * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words.. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

- * @see KeywordMarkerFilter */ + * @see SetKeywordMarkerFilter */ public final class ArabicStemFilter extends TokenFilter { private final ArabicStemmer stemmer = new ArabicStemmer(); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -97,7 +97,7 @@ /** * Builds an analyzer with the given stop words and a stem exclusion set. - * If a stem exclusion set is provided this analyzer will add a {@link KeywordMarkerFilter} + * If a stem exclusion set is provided this analyzer will add a {@link SetKeywordMarkerFilter} * before {@link BulgarianStemFilter}. */ public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) { @@ -114,7 +114,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link BulgarianStemFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new BulgarianStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -131,7 +131,7 @@ result = new StandardFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(excltable != null && !excltable.isEmpty()) - result = new KeywordMarkerFilter(result, excltable); + result = new SetKeywordMarkerFilter(result, excltable); return new TokenStreamComponents(source, new BrazilianStemFilter(result)); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,10 +30,10 @@ * A {@link TokenFilter} that applies {@link BrazilianStemmer}. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter * */ public final class BrazilianStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -97,7 +97,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -119,7 +119,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link ElisionFilter}, {@link LowerCaseFilter}, - * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is + * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -131,7 +131,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (working copy) @@ -52,13 +52,7 @@ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.buffer(); - final int length = termAtt.length(); - for (int i = 0; i < length;) { - i += Character.toChars( - Character.toLowerCase( - charUtils.codePointAt(buffer, i)), buffer, i); - } + charUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length()); return true; } else return false; Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -115,7 +115,7 @@ * , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If * a stem exclusion set is provided via * {@link #CzechAnalyzer(Version, CharArraySet, CharArraySet)} a - * {@link KeywordMarkerFilter} is added before + * {@link SetKeywordMarkerFilter} is added before * {@link CzechStemFilter}. */ @Override @@ -126,7 +126,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter( matchVersion, result, stopwords); if(!this.stemExclusionTable.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionTable); + result = new SetKeywordMarkerFilter(result, stemExclusionTable); result = new CzechStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java (working copy) @@ -2,7 +2,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -29,12 +29,12 @@ * A {@link TokenFilter} that applies {@link CzechStemmer} to stem Czech words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*

NOTE: Input is expected to be in lowercase, * but with diacritical marks

- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter */ public final class CzechStemFilter extends TokenFilter { private final CzechStemmer stemmer = new CzechStemmer(); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new DanishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -129,7 +129,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, {@link GermanNormalizationFilter} and {@link GermanLightStemFilter} */ @Override @@ -139,7 +139,7 @@ TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter( matchVersion, result, stopwords); - result = new KeywordMarkerFilter(result, exclusionSet); + result = new SetKeywordMarkerFilter(result, exclusionSet); result = new GermanNormalizationFilter(result); result = new GermanLightStemFilter(result); return new TokenStreamComponents(source, result); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -34,10 +34,10 @@ *

*

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

- * @see KeywordMarkerFilter + * @see SetKeywordMarkerFilter */ public final class GermanStemFilter extends TokenFilter { Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for javadoc +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*

Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (working copy) @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -73,7 +73,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -96,7 +96,7 @@ * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link EnglishPossessiveFilter}, * {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PorterStemFilter}. */ @Override @@ -108,7 +108,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * English words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -90,7 +90,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -112,7 +112,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SpanishLightStemFilter}. */ @Override @@ -123,7 +123,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SpanishLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -89,7 +89,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -111,7 +111,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new BasqueStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new FinnishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -133,7 +133,7 @@ * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link ElisionFilter}, * {@link LowerCaseFilter}, {@link StopFilter}, - * {@link KeywordMarkerFilter} if a stem exclusion set is + * {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, and {@link FrenchLightStemFilter} */ @Override @@ -145,7 +145,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!excltable.isEmpty()) - result = new KeywordMarkerFilter(result, excltable); + result = new SetKeywordMarkerFilter(result, excltable); result = new FrenchLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -107,7 +107,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -129,7 +129,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -144,7 +144,7 @@ result = new IrishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new IrishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -89,7 +89,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -111,7 +111,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link GalicianStemFilter}. */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new GalicianStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Galician words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Galician words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.StopwordAnalyzerBase; @@ -111,7 +111,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link IndicNormalizationFilter}, - * {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter} + * {@link HindiNormalizationFilter}, {@link SetKeywordMarkerFilter} * if a stem exclusion set is provided, {@link HindiStemFilter}, and * Hindi Stop words */ @@ -121,7 +121,7 @@ final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new LowerCaseFilter(matchVersion, source); if (!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new IndicNormalizationFilter(result); result = new HindiNormalizationFilter(result); result = new StopFilter(matchVersion, result, stopwords); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc @link +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -31,7 +31,7 @@ *

* In some cases the normalization may cause unrelated terms to conflate, so * to prevent terms from being normalized use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

* @see HindiNormalizer Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new HungarianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Hungarian words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java (working copy) @@ -175,10 +175,7 @@ @SuppressWarnings("unchecked") public List applyAffix(char strippedWord[], int length, HunspellAffix affix, int recursionDepth) { if(dictionary.isIgnoreCase()) { - for(int i=0;i * To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -89,7 +89,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -111,7 +111,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link LatvianStemFilter}. */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new LatvianStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (working copy) @@ -22,41 +22,28 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.util.CharArraySet; /** - * Marks terms as keywords via the {@link KeywordAttribute}. Each token - * contained in the provided is marked as a keyword by setting - * {@link KeywordAttribute#setKeyword(boolean)} to true. + * Marks terms as keywords via the {@link KeywordAttribute}. * * @see KeywordAttribute */ -public final class KeywordMarkerFilter extends TokenFilter { +public abstract class KeywordMarkerFilter extends TokenFilter { private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final CharArraySet keywordSet; /** - * Create a new KeywordMarkerFilter, that marks the current token as a - * keyword if the tokens term buffer is contained in the given set via the - * {@link KeywordAttribute}. - * - * @param in - * TokenStream to filter - * @param keywordSet - * the keywords set to lookup the current termbuffer + * Creates a new {@link KeywordMarkerFilter} + * @param in the input stream */ - public KeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) { + protected KeywordMarkerFilter(TokenStream in) { super(in); - this.keywordSet = keywordSet; } @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { + if (isKeyword()) { keywordAttr.setKeyword(true); } return true; @@ -64,4 +51,7 @@ return false; } } + + protected abstract boolean isKeyword(); + } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.regex.Pattern; import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; import org.apache.lucene.analysis.util.*; @@ -29,23 +30,30 @@ * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"> * <analyzer> * <tokenizer class="solr.WhitespaceTokenizerFactory"/> - * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" ignoreCase="false"/> + * <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/> * </analyzer> * </fieldType> * */ public class KeywordMarkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { public static final String PROTECTED_TOKENS = "protected"; + public static final String PATTERN = "pattern"; private CharArraySet protectedWords; private boolean ignoreCase; + private Pattern pattern; @Override public void inform(ResourceLoader loader) throws IOException { String wordFiles = args.get(PROTECTED_TOKENS); + String stringPattern = args.get(PATTERN); ignoreCase = getBoolean("ignoreCase", false); if (wordFiles != null) { protectedWords = getWordSet(loader, wordFiles, ignoreCase); } + if (stringPattern != null) { + pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern); + } + } public boolean isIgnoreCase() { @@ -54,6 +62,12 @@ @Override public TokenStream create(TokenStream input) { - return protectedWords == null ? input : new KeywordMarkerFilter(input, protectedWords); + if (pattern != null) { + input = new PatternKeywordMarkerFilter(input, pattern); + } + if (protectedWords != null) { + input = new SetKeywordMarkerFilter(input, protectedWords); + } + return input; } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (working copy) @@ -0,0 +1,57 @@ +package org.apache.lucene.analysis.miscellaneous; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +/** + * Marks terms as keywords via the {@link KeywordAttribute}. Each token + * that matches the provided pattern is marked as a keyword by setting + * {@link KeywordAttribute#setKeyword(boolean)} to true. + */ +public class PatternKeywordMarkerFilter extends KeywordMarkerFilter { + + private final Matcher matcher; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** + * Create a new {@link PatternKeywordMarkerFilter}, that marks the current + * token as a keyword if the tokens term buffer matches the provided + * {@link Pattern} via the {@link KeywordAttribute}. + * + * @param in + * TokenStream to filter + * @param pattern + * the pattern to apply to the incoming term buffer + **/ + protected PatternKeywordMarkerFilter(TokenStream in, Pattern pattern) { + super(in); + this.matcher = pattern.matcher(""); + } + + @Override + protected boolean isKeyword() { + matcher.reset(termAtt); + return matcher.matches(); + } + +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (working copy) @@ -0,0 +1,52 @@ +package org.apache.lucene.analysis.miscellaneous; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; +import org.apache.lucene.analysis.util.CharArraySet; + +/** + * Marks terms as keywords via the {@link KeywordAttribute}. Each token + * contained in the provided set is marked as a keyword by setting + * {@link KeywordAttribute#setKeyword(boolean)} to true. + */ +public final class SetKeywordMarkerFilter extends KeywordMarkerFilter { + private final CharArraySet keywordSet; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** + * Create a new KeywordSetMarkerFilter, that marks the current token as a + * keyword if the tokens term buffer is contained in the given set via the + * {@link KeywordAttribute}. + * + * @param in + * TokenStream to filter + * @param keywordSet + * the keywords set to lookup the current termbuffer + */ + public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) { + super(in); + this.keywordSet = keywordSet; + } + + @Override + protected boolean isKeyword() { + return keywordSet.contains((CharSequence) termAtt); + } + +} Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (revision 0) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java (working copy) Property changes on: lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (working copy) @@ -20,7 +20,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; @@ -129,7 +129,7 @@ * * @return A {@link TokenStream} built from a {@link StandardTokenizer} * filtered with {@link StandardFilter}, {@link LowerCaseFilter}, - * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem exclusion set is provided, + * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is provided, * {@link StemmerOverrideFilter}, and {@link SnowballFilter} */ @Override @@ -140,7 +140,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stoptable); if (!excltable.isEmpty()) - result = new KeywordMarkerFilter(result, excltable); + result = new SetKeywordMarkerFilter(result, excltable); if (!stemdict.isEmpty()) result = new StemmerOverrideFilter(matchVersion, result, stemdict); result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer()); Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new NorwegianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -90,7 +90,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -112,7 +112,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PortugueseLightStemFilter}. */ @Override @@ -123,7 +123,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PortugueseLightStemFilter(result); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * Portuguese words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -94,7 +94,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -116,7 +116,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -127,7 +127,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new RomanianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (working copy) @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.IOUtils; @@ -111,7 +111,7 @@ * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided, and {@link SnowballFilter} */ @Override @@ -122,7 +122,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java (working copy) @@ -20,7 +20,7 @@ import java.util.Map; import java.io.IOException; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -74,7 +74,7 @@ } if (protectedWords != null) - input = new KeywordMarkerFilter(input, protectedWords); + input = new SetKeywordMarkerFilter(input, protectedWords); return new SnowballFilter(input, program); } } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -91,7 +91,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -113,7 +113,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override @@ -124,7 +124,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -30,7 +30,7 @@ * words. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -93,7 +93,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -115,7 +115,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link TurkishLowerCaseFilter}, - * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem + * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem * exclusion set is provided and {@link SnowballFilter}. */ @Override @@ -126,7 +126,7 @@ result = new TurkishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (working copy) @@ -131,8 +131,26 @@ } return new CharacterBuffer(new char[bufferSize], 0, 0); } - + + /** + * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting + * at the given offset. + * @param buffer the char buffer to lowercase + * @param offset the offset to start at + * @param limit the max char in the buffer to lower case + */ + public void toLowerCase(final char[] buffer, final int offset, final int limit) { + assert buffer.length >= limit; + assert offset <=0 && offset <= buffer.length; + for (int i = offset; i < limit;) { + i += Character.toChars( + Character.toLowerCase( + codePointAt(buffer, i)), buffer, i); + } + } + + /** * Fills the {@link CharacterBuffer} with characters read from the given * reader {@link Reader}. This method tries to read as many characters into * the {@link CharacterBuffer} as possible, each call to fill will start Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (revision 1455119) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (working copy) @@ -215,12 +215,9 @@ * The user should never modify this text array after calling this method. */ public V put(char[] text, V value) { - if (ignoreCase) - for(int i=0;i>addAll(oddlyNamedComponents, ReversePathHierarchyTokenizer.class, // this is supported via an option to PathHierarchyTokenizer's factory - SnowballFilter.class // this is called SnowballPorterFilterFactory + SnowballFilter.class, // this is called SnowballPorterFilterFactory + PatternKeywordMarkerFilter.class, + SetKeywordMarkerFilter.class ); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; /** @@ -281,7 +281,7 @@ public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("hole"); - CzechStemFilter filter = new CzechStemFilter(new KeywordMarkerFilter( + CzechStemFilter filter = new CzechStemFilter(new SetKeywordMarkerFilter( new MockTokenizer(new StringReader("hole desek"), MockTokenizer.WHITESPACE, false), set)); assertTokenStreamContents(filter, new String[] { "hole", "desk" }); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.core.LowerCaseTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; public class TestGermanAnalyzer extends BaseTokenStreamTestCase { @@ -38,7 +38,7 @@ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("fischen"); GermanStemFilter filter = new GermanStemFilter( - new KeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( + new SetKeywordMarkerFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader( "Fischen Trinken")), set)); assertTokenStreamContents(filter, new String[] { "fischen", "trink" }); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -55,7 +55,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -62,7 +62,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanMinimalStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -68,7 +68,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GermanStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -57,7 +57,7 @@ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.add("yourselves"); Tokenizer tokenizer = new MockTokenizer(new StringReader("yourselves yours"), MockTokenizer.WHITESPACE, false); - TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); + TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set)); assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -55,7 +55,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FinnishLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -185,7 +185,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -64,7 +64,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new FrenchMinimalStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; /** @@ -59,7 +59,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new GalicianMinimalStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -55,7 +55,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new HungarianLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -63,7 +63,7 @@ // assert with keywork marker tokenizer = new MockTokenizer(new StringReader("lucene is awesome"), MockTokenizer.WHITESPACE, true); CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true); - filter = new HunspellStemFilter(new KeywordMarkerFilter(tokenizer, set), DICTIONARY); + filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), DICTIONARY); assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1}); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (working copy) @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.Locale; +import java.util.regex.Pattern; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; @@ -36,34 +37,68 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase { @Test - public void testIncrementToken() throws IOException { + public void testSetFilterIncrementToken() throws IOException { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 5, true); set.add("lucenefox"); String[] output = new String[] { "the", "quick", "brown", "LuceneFox", "jumps" }; assertTokenStreamContents(new LowerCaseFilterMock( - new KeywordMarkerFilter(new MockTokenizer(new StringReader( + new SetKeywordMarkerFilter(new MockTokenizer(new StringReader( "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set)), output); CharArraySet mixedCaseSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("LuceneFox"), false); assertTokenStreamContents(new LowerCaseFilterMock( - new KeywordMarkerFilter(new MockTokenizer(new StringReader( + new SetKeywordMarkerFilter(new MockTokenizer(new StringReader( "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), mixedCaseSet)), output); CharArraySet set2 = set; assertTokenStreamContents(new LowerCaseFilterMock( - new KeywordMarkerFilter(new MockTokenizer(new StringReader( + new SetKeywordMarkerFilter(new MockTokenizer(new StringReader( "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), set2)), output); } + + @Test + public void testPatternFilterIncrementToken() throws IOException { + String[] output = new String[] { "the", "quick", "brown", "LuceneFox", + "jumps" }; + assertTokenStreamContents(new LowerCaseFilterMock( + new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader( + "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[fF]ox"))), output); + + output = new String[] { "the", "quick", "brown", "lucenefox", + "jumps" }; + + assertTokenStreamContents(new LowerCaseFilterMock( + new PatternKeywordMarkerFilter(new MockTokenizer(new StringReader( + "The quIck browN LuceneFox Jumps"), MockTokenizer.WHITESPACE, false), Pattern.compile("[a-zA-Z]+[f]ox"))), output); + } // LUCENE-2901 public void testComposition() throws Exception { TokenStream ts = new LowerCaseFilterMock( - new KeywordMarkerFilter( - new KeywordMarkerFilter( + new SetKeywordMarkerFilter( + new SetKeywordMarkerFilter( new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false), new CharArraySet(TEST_VERSION_CURRENT, asSet("Birds", "Houses"), false)), new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); + + ts = new LowerCaseFilterMock( + new PatternKeywordMarkerFilter( + new PatternKeywordMarkerFilter( + new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false), + Pattern.compile("Birds|Houses")), + Pattern.compile("Dogs|Trees"))); + + assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); + + ts = new LowerCaseFilterMock( + new SetKeywordMarkerFilter( + new PatternKeywordMarkerFilter( + new MockTokenizer(new StringReader("Dogs Trees Birds Houses"), MockTokenizer.WHITESPACE, false), + Pattern.compile("Birds|Houses")), + new CharArraySet(TEST_VERSION_CURRENT, asSet("Dogs", "Trees"), false))); + + assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" }); } public static final class LowerCaseFilterMock extends TokenFilter { Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java (working copy) @@ -35,6 +35,7 @@ * Simple tests to ensure the keyword marker filter factory is working. */ public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase { + public void testKeywords() throws IOException { Reader reader = new StringReader("dogs cats"); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); @@ -48,8 +49,38 @@ TokenStream ts = new PorterStemFilter(factory.create(tokenizer)); assertTokenStreamContents(ts, new String[] { "dog", "cats" }); + + + reader = new StringReader("dogs cats"); + tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + factory = new KeywordMarkerFilterFactory(); + args = new HashMap(); + + args.put("pattern", "cats|Dogs"); + factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); + factory.init(args); + factory.inform(null); + + ts = new PorterStemFilter(factory.create(tokenizer)); + assertTokenStreamContents(ts, new String[] { "dog", "cats" }); } + public void testKeywordsMixed() throws IOException { + Reader reader = new StringReader("dogs cats birds"); + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory(); + Map args = new HashMap(); + ResourceLoader loader = new StringMockResourceLoader("cats"); + args.put("protected", "protwords.txt"); + args.put("pattern", "birds|Dogs"); + factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); + factory.init(args); + factory.inform(loader); + + TokenStream ts = new PorterStemFilter(factory.create(tokenizer)); + assertTokenStreamContents(ts, new String[] { "dog", "cats", "birds" }); + } + public void testKeywordsCaseInsensitive() throws IOException { Reader reader = new StringReader("dogs cats Cats"); Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); @@ -64,5 +95,36 @@ TokenStream ts = new PorterStemFilter(factory.create(tokenizer)); assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" }); + + reader = new StringReader("dogs cats Cats"); + tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + factory = new KeywordMarkerFilterFactory(); + args = new HashMap(); + + args.put("pattern", "Cats"); + args.put("ignoreCase", "true"); + factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); + factory.init(args); + factory.inform(null); + + ts = new PorterStemFilter(factory.create(tokenizer)); + assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats" }); } + + public void testKeywordsCaseInsensitiveMixed() throws IOException { + Reader reader = new StringReader("dogs cats Cats Birds birds"); + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory(); + Map args = new HashMap(); + ResourceLoader loader = new StringMockResourceLoader("cats"); + args.put("protected", "protwords.txt"); + args.put("pattern", "birds"); + args.put("ignoreCase", "true"); + factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); + factory.init(args); + factory.inform(loader); + + TokenStream ts = new PorterStemFilter(factory.create(tokenizer)); + assertTokenStreamContents(ts, new String[] { "dog", "cats", "Cats", "Birds", "birds" }); + } } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -57,7 +57,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -57,7 +57,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new NorwegianMinimalStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; @@ -103,7 +103,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (working copy) @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; @@ -77,7 +77,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (working copy) @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.CharArraySet; @@ -76,7 +76,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new PortugueseStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -55,7 +55,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new RussianLightStemFilter(sink)); } }; Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (revision 1455119) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (working copy) @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import static org.apache.lucene.analysis.VocabularyAssert.*; @@ -55,7 +55,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new SwedishLightStemFilter(sink)); } }; Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java (working copy) @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; @@ -32,7 +32,7 @@ * This acts as a lemmatizer for verbs and adjectives. *

* To prevent terms from being stemmed use an instance of - * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets * the {@link KeywordAttribute} before this {@link TokenStream}. *

*/ Index: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java =================================================================== --- lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java (working copy) @@ -35,7 +35,7 @@ *

*

* In order to prevent terms from being stemmed, use an instance of - * {@link org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter} + * {@link org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter} * or a custom {@link TokenFilter} that sets the {@link KeywordAttribute} * before this {@link TokenStream}. *

Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase { @@ -49,7 +49,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new JapaneseTokenizer(reader, null, true, JapaneseTokenizer.DEFAULT_MODE); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink)); } }; Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (revision 1455119) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; import java.io.IOException; @@ -70,7 +70,7 @@ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - TokenStream sink = new KeywordMarkerFilter(source, exclusionSet); + TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet); return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink)); } }; Index: lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java =================================================================== --- lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (revision 1455119) +++ lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (working copy) @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; @@ -112,7 +112,7 @@ /** * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is - * provided this analyzer will add a {@link KeywordMarkerFilter} before + * provided this analyzer will add a {@link SetKeywordMarkerFilter} before * stemming. * * @param matchVersion lucene compatibility version @@ -135,7 +135,7 @@ * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - * , {@link KeywordMarkerFilter} if a stem exclusion set is + * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link StempelFilter}. */ @Override @@ -146,7 +146,7 @@ result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) - result = new KeywordMarkerFilter(result, stemExclusionSet); + result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new StempelFilter(result, new StempelStemmer(stemTable)); return new TokenStreamComponents(source, result); }